Skip to content
Snippets Groups Projects
alignementseq.py 4.98 KiB
Newer Older
Ariane Delrocq's avatar
Ariane Delrocq committed
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 15 15:02:16 2019

@author: ariane.delrocq
"""

import Bio.SubsMat.MatrixInfo
import Bio.SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

seqs=[]
handle = open("balibase/RV11.unaligned/BBS11001.fasta")
for seq in Bio.SeqIO.parse(handle, "fasta"):
    seqs.append(seq)
handle.close()
Ariane Delrocq's avatar
Ariane Delrocq committed

mat = Bio.SubsMat.MatrixInfo.blosum62


def score(a, b, gap=-8, id=1, mut=-1):
    if not a or not b:
        return gap
    else:
        a = a.upper()
        b = b.upper()
        try:
            return mat[a,b]
        except KeyError:
            return mat[b,a]
    
#    elif a==b:
#        return id
#    else:
#        return mut

def align(seq1, seq2):
    x = seq1.seq
    y = seq2.seq
Ariane Delrocq's avatar
Ariane Delrocq committed
    n=len(x)
    m=len(y)
    mm = [[0 for _ in range(m+1)] for _ in range(n+1)]   # scores matrix
Ariane Delrocq's avatar
Ariane Delrocq committed
    path = [[0 for _ in range(m+1)] for _ in range(n+1)]   # last step for optimal score
    
    # first line and col
    for i in range(1, n+1):
        mm[i][0] = mm[i-1][0] + score('', x[i-1])
Ariane Delrocq's avatar
Ariane Delrocq committed
        path[i][0] = (i-1, 0)
    for j in range(1, m+1):
        mm[0][j] = mm[j-1][0] + score('', y[j-1])
Ariane Delrocq's avatar
Ariane Delrocq committed
        path[0][j] = (0, j-1)
    
    # fill table
    for i in range(1, n+1):
        for j in range(1, m+1):
            a = x[i-1]
            b = y[j-1]
            s1 = mm[i-1][j-1] + score(a,b)
            s2 = mm[i-1][j] + score('', a)
            s3 = mm[i][j-1] + score('', b)
Ariane Delrocq's avatar
Ariane Delrocq committed
            if s1 >= s2:
                if s1 >= s3:
                    mm[i][j] = s1
Ariane Delrocq's avatar
Ariane Delrocq committed
                    path[i][j] = (i-1, j-1)
                else:
                    mm[i][j] = s3
Ariane Delrocq's avatar
Ariane Delrocq committed
                    path[i][j] = (i, j-1)
            else:   # s2 > s1
                if s2 >= s3:
                    mm[i][j] = s2
Ariane Delrocq's avatar
Ariane Delrocq committed
                    path[i][j] = (i-1, j)
                else:
                    mm[i][j] = s3
Ariane Delrocq's avatar
Ariane Delrocq committed
                    path[i][j] = (i, j-1)
    
    bestScore = mm[n][m]
    alx = ""
    aly = ""
    i, j = n, m
    while i > 0 and j > 0:
        i2, j2 = path[i][j]
        if i==i2:
            alx+='-'
        else:
            alx+=x[i-1]
        if j==j2:
            aly+='-'
        else:
            aly+=y[j-1]
        i,j = i2, j2
    while i>0:
        aly+='-'
        alx+=x[i-1]
        i -=1
    while j>0:
        alx+='-'
        aly+=y[j-1]
        j -=1
    alx = alx[::-1]
    aly = aly[::-1]
    seq1.seq = Seq(alx)
    seq2.seq = Seq(aly)
    return bestScore, seq1, seq2


def align2steps(seq1, seq2, d=8, e=4):
    x = seq1.seq
    y = seq2.seq
    n=len(x)
    m=len(y)
    mm = [[0 for _ in range(m+1)] for _ in range(n+1)]   # scores matrix
    mx = [[0 for _ in range(m+1)] for _ in range(n+1)]
    my = [[0 for _ in range(m+1)] for _ in range(n+1)]
    path = [[0 for _ in range(m+1)] for _ in range(n+1)]   # last step for optimal score
    
    # first line and col
    for i in range(1, n+1):
        mx[i][1] = my[i][1] = -d -(i-1)*e
        path[i][0] = (i-1, 0)
    for j in range(1, m+1):
        mx[1][j] = my[1][i] = -d - (j-1)*e
        path[0][j] = (0, j-1)
    
    # fill table
    for i in range(2, n+1):
        for j in range(2, m+1):
            a = x[i-1]
            b = y[j-1]
            
            # find max for M
            s1 = mm[i-1][j-1] + score(a,b)
            s2 = mx[i-1][j-1] + score(a,b)
            s3 = my[i][j-1] + score(a,b)
            
            if s1 >= s2:
                if s1 >= s3:
                    mm[i][j] = s1
                    path[i][j] = (i-1, j-1)
                else:
                    mm[i][j] = s3
                    path[i][j] = (i, j-1)
            else:   # s2 > s1
                if s2 >= s3:
                    mm[i][j] = s2
                    path[i][j] = (i-1, j)
                else:
                    mm[i][j] = s3
                    path[i][j] = (i, j-1)
            
            # find max for I_x
            s4 = mm[i-1][j] - d
            s5 = mx[i-1][j] - e
            if s4 >= s5:
                mx[i][j] = s4
            else:
                mx[i][j] = s5
            
            # find max for I_y
            s6 = mm[i][j-1] - d
            s7 = my[i][j-1] - e
            if s6 >= s7:
                my[i][j] = s6
            else:
                my[i][j] = s7
    
    bestScore = mm[n][m]
Ariane Delrocq's avatar
Ariane Delrocq committed
    alx = ""
    aly = ""
    i, j = n, m
    while i > 0 and j > 0:
        i2, j2 = path[i][j]
        if i==i2:
            alx+='-'
        else:
            alx+=x[i-1]
        if j==j2:
            aly+='-'
        else:
            aly+=y[j-1]
        i,j = i2, j2
    while i>0:
        aly+='-'
        alx+=x[i-1]
        i -=1
    while j>0:
        alx+='-'
        aly+=y[j-1]
        j -=1
    alx = alx[::-1]
    aly = aly[::-1]
    seq1.seq = Seq(alx)
    seq2.seq = Seq(aly)
    return bestScore, seq1, seq2
Ariane Delrocq's avatar
Ariane Delrocq committed

def test():
#    print(align("chat", "cat"))
#    print(align("chat", "cgat"))
#    print(align("chat", "at"))
    s, x, y = align(seqs[0], seqs[1])
    print(s,x,y)
    with open("balibase/us/1.fasta", "w") as fd:
        Bio.SeqIO.write((x, y), fd, "fasta")
test()