Skip to content
Snippets Groups Projects
Verified Commit 5e447d8a authored by Etienne MORICE's avatar Etienne MORICE
Browse files

Added performance evaluations.

 * Raw timings for the pairwise alignment of the first two sequences of
   each balibase file
 * Timings for alignment of mouse and human titin, with a timeout
   controlled by `UNIT_TIMEOUT`
parent e2463bc9
No related branches found
No related tags found
No related merge requests found
......@@ -2,3 +2,4 @@
balibase/
__pycache__/
balibase.zip
atlas
......@@ -6,4 +6,8 @@ cache:
- balibase/
test:
script:
- python3 -m unittest alignementseq_tests.py
- python3 -m unittest alignementseq_tests
test_performance:
script:
- python3 -m unittest alignementseq_tests.PerformanceTestCase
......@@ -4,14 +4,23 @@ import unittest
import urllib.request
import os
import zipfile
import warnings
import timeit
import multiprocessing
import pandas as pd
import Bio.SeqIO
import Bio.pairwise2
import Bio.Entrez
from Bio.SeqRecord import SeqRecord
from Bio.SubsMat.MatrixInfo import blosum62
class AlignmentSeqTestCase(unittest.TestCase):
def setUp(self):
self.unit_timeout = int(os.environ.get("UNIT_TIMEOUT") or 60)
balibase_zippath = "balibase.zip"
self.balibase_path = "balibase"
testfile_path = os.path.join(
......@@ -51,7 +60,9 @@ class AlignmentSeqTestCase(unittest.TestCase):
)
def test_simple_align(self):
"""Test alignments with the simplest metric. As there can be a huge number of
"""Test alignments with the simplest metric.
As there can be a huge number of
solutions, we check only that we got the right score, and one valid
alignment.
"""
......@@ -82,6 +93,9 @@ class AlignmentSeqTestCase(unittest.TestCase):
raise
def test_blosum_align(self):
"""Tests alignments with blosum but no gap extension.
"""
from alignementseq import align, vec_align
for s1, s2, filename in self.get_dataset_heads():
......@@ -107,6 +121,63 @@ class AlignmentSeqTestCase(unittest.TestCase):
print(ex_r2)
raise
class PerformanceTestCase(AlignmentSeqTestCase):
"""Performance tests, slow.
Excluded from default test suite, run it with
``python -m unittest alignmentseq_tests.PerformanceTestCase``
"""
def setUp(self):
"""Loads titin sequences.
"""
super().setUp()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self.titin_human, self.titin_mouse = Bio.SeqIO.parse(
Bio.Entrez.efetch(db="protein", id=["CAA62188","EDL27217"],
rettype="gp", retmode="text"),
"genbank")
def test_performance(self):
from alignementseq import align, vec_align
methods = (vec_align, align)
times = pd.DataFrame(columns=("length1", "length2", "method", "time"))
for s1, s2, filename in self.get_dataset_heads():
for method in methods:
def to_time():
method(s1, s2)
t = timeit.Timer(to_time).timeit(1)
times.loc[len(times)] = (
len(s1.seq), len(s2.seq), method.__name__, t
)
print(times)
def test_titin(self):
from alignementseq import align, vec_align
for method in vec_align, align:
def to_time():
method(self.titin_human, self.titin_mouse)
def run():
t = timeit.Timer(to_time).timeit(1)
print("{}: {}".format(method.__name__, t))
p = multiprocessing.Process(target=run)
p.start()
p.join(self.unit_timeout)
# Note: if the task eats up your memory, it can take a while for
# it to terminate if it times out.
if p.is_alive():
p.terminate()
p.join()
print("{}: timeout".format(method.__name__))
def load_tests(loader, standard_tests, pattern):
return unittest.defaultTestLoader.loadTestsFromTestCase(AlignmentSeqTestCase)
if __name__ == '__main__':
unittest.main()
unittest.main(verbosity=2)
biopython==1.73
pandas==0.23.3
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment