diff --git a/alignementseq.py b/alignementseq.py index 5142a33caf00c863a04333d2fc2cc7b2c6fc7c10..2afa73ae0d46da4614fec6473d0ad871f78378ec 100644 --- a/alignementseq.py +++ b/alignementseq.py @@ -41,15 +41,15 @@ def align(seq1, seq2): y = seq2.seq n=len(x) m=len(y) - mat = [[0 for _ in range(m+1)] for _ in range(n+1)] # scores matrix + mm = [[0 for _ in range(m+1)] for _ in range(n+1)] # scores matrix path = [[0 for _ in range(m+1)] for _ in range(n+1)] # last step for optimal score # first line and col for i in range(1, n+1): - mat[i][0] = mat[i-1][0] + score('', x[i-1]) + mm[i][0] = mm[i-1][0] + score('', x[i-1]) path[i][0] = (i-1, 0) for j in range(1, m+1): - mat[0][j] = mat[j-1][0] + score('', y[j-1]) + mm[0][j] = mm[j-1][0] + score('', y[j-1]) path[0][j] = (0, j-1) # fill table @@ -57,25 +57,115 @@ def align(seq1, seq2): for j in range(1, m+1): a = x[i-1] b = y[j-1] - s1 = mat[i-1][j-1] + score(a,b) - s2 = mat[i-1][j] + score('', a) - s3 = mat[i][j-1] + score('', b) + s1 = mm[i-1][j-1] + score(a,b) + s2 = mm[i-1][j] + score('', a) + s3 = mm[i][j-1] + score('', b) if s1 >= s2: if s1 >= s3: - mat[i][j] = s1 + mm[i][j] = s1 path[i][j] = (i-1, j-1) else: - mat[i][j] = s3 + mm[i][j] = s3 path[i][j] = (i, j-1) else: # s2 > s1 if s2 >= s3: - mat[i][j] = s2 + mm[i][j] = s2 path[i][j] = (i-1, j) else: - mat[i][j] = s3 + mm[i][j] = s3 path[i][j] = (i, j-1) - bestScore = mat[n][m] + bestScore = mm[n][m] + alx = "" + aly = "" + i, j = n, m + while i > 0 and j > 0: + i2, j2 = path[i][j] + if i==i2: + alx+='-' + else: + alx+=x[i-1] + if j==j2: + aly+='-' + else: + aly+=y[j-1] + i,j = i2, j2 + while i>0: + aly+='-' + alx+=x[i-1] + i -=1 + while j>0: + alx+='-' + aly+=y[j-1] + j -=1 + alx = alx[::-1] + aly = aly[::-1] + seq1.seq = Seq(alx) + seq2.seq = Seq(aly) + return bestScore, seq1, seq2 + + +def align2steps(seq1, seq2, d=8, e=4): + x = seq1.seq + y = seq2.seq + n=len(x) + m=len(y) + mm = [[0 for _ in range(m+1)] for _ in range(n+1)] # scores matrix + mx = [[0 for _ in range(m+1)] for _ in range(n+1)] + my = [[0 for _ in range(m+1)] for _ in range(n+1)] + path = [[0 for _ in range(m+1)] for _ in range(n+1)] # last step for optimal score + + # first line and col + for i in range(1, n+1): + mx[i][1] = my[i][1] = -d -(i-1)*e + path[i][0] = (i-1, 0) + for j in range(1, m+1): + mx[1][j] = my[1][i] = -d - (j-1)*e + path[0][j] = (0, j-1) + + # fill table + for i in range(2, n+1): + for j in range(2, m+1): + a = x[i-1] + b = y[j-1] + + # find max for M + s1 = mm[i-1][j-1] + score(a,b) + s2 = mx[i-1][j-1] + score(a,b) + s3 = my[i][j-1] + score(a,b) + + if s1 >= s2: + if s1 >= s3: + mm[i][j] = s1 + path[i][j] = (i-1, j-1) + else: + mm[i][j] = s3 + path[i][j] = (i, j-1) + else: # s2 > s1 + if s2 >= s3: + mm[i][j] = s2 + path[i][j] = (i-1, j) + else: + mm[i][j] = s3 + path[i][j] = (i, j-1) + + # find max for I_x + s4 = mm[i-1][j] - d + s5 = mx[i-1][j] - e + if s4 >= s5: + mx[i][j] = s4 + else: + mx[i][j] = s5 + + # find max for I_y + s6 = mm[i][j-1] - d + s7 = my[i][j-1] - e + if s6 >= s7: + my[i][j] = s6 + else: + my[i][j] = s7 + + bestScore = mm[n][m] alx = "" aly = "" i, j = n, m