Skip to content
Snippets Groups Projects
Verified Commit 4dbdda67 authored by Etienne MORICE's avatar Etienne MORICE
Browse files

Added more serious unit tests, found two errors.

 * Forgotten an in-place modification of arguments in `align`
 * Wrong initialisation of first column in `align` (index confusion)
parent 17c0571f
No related branches found
No related tags found
No related merge requests found
......@@ -13,11 +13,11 @@ from Bio.Seq import Seq
import Bio.pairwise2
seqs=[]
handle = open("balibase/RV11.unaligned/BBS11001.fasta")
for seq in Bio.SeqIO.parse(handle, "fasta"):
seqs.append(seq)
handle.close()
#seqs=[]
#handle = open("balibase/RV11.unaligned/BBS11001.fasta")
#for seq in Bio.SeqIO.parse(handle, "fasta"):
# seqs.append(seq)
#handle.close()
mat = Bio.SubsMat.MatrixInfo.blosum62
......@@ -51,7 +51,7 @@ def align(seq1, seq2, score=score):
mm[i][0] = mm[i-1][0] + score('', x[i-1])
path[i][0] = (i-1, 0)
for j in range(1, m+1):
mm[0][j] = mm[j-1][0] + score('', y[j-1])
mm[0][j] = mm[0][j-1] + score('', y[j-1])
path[0][j] = (0, j-1)
# fill table
......@@ -100,11 +100,11 @@ def align(seq1, seq2, score=score):
alx+='-'
aly+=y[j-1]
j -=1
alx = alx[::-1]
aly = aly[::-1]
seq1.seq = Seq(alx)
seq2.seq = Seq(aly)
return bestScore, seq1, seq2
aseq1 = SeqRecord(seq1)
aseq1.seq = Seq(alx[::-1])
aseq2 = SeqRecord(seq2)
aseq2.seq = Seq(aly[::-1])
return bestScore, aseq1, aseq2
def align2steps(seq1, seq2, d=8, e=4):
......
#!/usr/bin/env python3
import unittest
import urllib.request
import os
import zipfile
import Bio.SeqIO
import Bio.pairwise2
from Bio.SeqRecord import SeqRecord
class AlignmentSeqTestCase(unittest.TestCase):
def setUp(self):
balibase_zippath = "balibase.zip"
balibase_path = "balibase"
self.balibase_path = "balibase"
testfile_path = os.path.join(
balibase_path,
self.balibase_path,
"RV11.unaligned", "BBS11001.fasta")
if not os.path.isdir(balibase_path):
os.mkdir(balibase_path)
if not os.path.isdir(self.balibase_path):
os.mkdir(self.balibase_path)
if not os.path.isfile(testfile_path):
if not os.path.isfile(balibase_zippath):
print("Fetching balibase archive from moodle...")
......@@ -24,17 +28,65 @@ class AlignmentSeqTestCase(unittest.TestCase):
with zipfile.ZipFile(balibase_zippath) as balibase_zip:
balibase_zip.extractall()
# Generator function to iterate over the first two sequences of each
# unaligned fasta file.
def get_dataset_heads(self):
dataset_dir = os.path.join(self.balibase_path, "RV11.unaligned")
for filename in os.listdir(dataset_dir):
records = Bio.SeqIO.parse(
os.path.join(dataset_dir, filename),
"fasta")
seq1 = next(records)
seq2 = next(records)
yield seq1, seq2, filename
# Strip strings of their '-' before comparing them
def assertSameResidues(self, str1, str2):
return self.assertEqual(
str(str1).translate({ord('-'):None}),
str(str2).translate({ord('-'):None})
)
# Test alignments with the simplest metric. As there can be a huge number of
# solutions, we check only that we got the right score, and one valid
# alignment.
def test_simple_align(self):
from alignementseq import align
score_fn = lambda a,b : -2 if a == '' or b == '' else -1 if a != b else 1
for s1, s2, filename in self.get_dataset_heads():
score, r1, r2 = align(s1, s2, score_fn)
(ex_r1, ex_r2, exp_score, *_) = Bio.pairwise2.align.globalms(
s1.seq, s2.seq, 1, -1, -2, -2)[0]
try:
self.assertSameResidues(r1.seq, s1.seq)
self.assertSameResidues(r2.seq, s2.seq)
self.assertEqual(score, sum(
-2 if a == '-' or b == '-' else
-1 if a != b else
1
for a,b in zip(r1.seq, r2.seq)))
self.assertEqual(score, exp_score)
except AssertionError:
print('from', filename, ':')
print(r1.seq)
print(r2.seq)
print('')
print(ex_r1)
print(ex_r2)
raise
def test_blosum_align(self):
from alignementseq import align
cases = [
("CHAT", "CAT", 1, "CHAT", "C-AT"),
("CHAT", "CGAT", 2, "CHAT", "CGAT"),
("CHAT", "AT", -2, "CHAT", "--AT")
("CHAT", "CAT", 10, "CHAT", "C-AT"),
("CHAT", "CGAT", 16, "CHAT", "CGAT"),
("CHAT", "AT", -7, "CHAT", "--AT")
]
for s1, s2, exp_score, exp_r1, exp_r2 in cases:
score, r1, r2 = align(SeqRecord(s1), SeqRecord(s2), score_fn)
score, r1, r2 = align(SeqRecord(s1), SeqRecord(s2))
self.assertEqual(score, exp_score)
self.assertEqual(r1.seq, exp_r1)
self.assertEqual(r2.seq, exp_r2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment