Skip to content
Snippets Groups Projects
Verified Commit 1b346096 authored by Etienne MORICE's avatar Etienne MORICE
Browse files

Benchmark for ClustalO

parent c0e1e97e
No related branches found
No related tags found
No related merge requests found
......@@ -6,3 +6,4 @@ timings.png
balibase.zip
*.fasta
data/
balibase_results/
......@@ -8,6 +8,9 @@ cache:
test:
script:
- python3 -m unittest alignementseq_tests test_alignementseq_multiple
artifacts:
paths:
- balibase_results/
test_performance:
script:
......
......@@ -30,7 +30,7 @@ import os
import sys
import io
spec, m = None, None
bali_spec, bali_m = None, None
def bali_score(ref_fasta_path, test_fasta_path):
"""Portable wrapper for the bali_score.py script.
......@@ -39,16 +39,16 @@ def bali_score(ref_fasta_path, test_fasta_path):
Reproduces the results of the bali_score C program.
"""
path = os.path.join("balibase", "bali_score.py")
global spec, m
if spec is None:
spec = importlib.util.spec_from_file_location("baliscore", path)
m = importlib.util.module_from_spec(spec)
global bali_spec, bali_m
if bali_spec is None:
bali_spec = importlib.util.spec_from_file_location("baliscore", path)
bali_m = importlib.util.module_from_spec(bali_spec)
tmp_argv = sys.argv
tmp_stdout = sys.stdout
buf = sys.stdout = io.StringIO()
sys.argv = [path, ref_fasta_path, test_fasta_path]
try:
spec.loader.exec_module(m)
bali_spec.loader.exec_module(bali_m)
finally:
sys.argv = tmp_argv
sys.stdout = tmp_stdout
......@@ -162,8 +162,6 @@ class BalibaseTestCase(unittest.TestCase):
item.append(name)
yield item
class AlignmentSeqTestCase(BalibaseTestCase):
def get_dataset_records(self):
"""Generator function to iterate over the record generator of each
unaligned fasta file.
......@@ -175,6 +173,8 @@ class AlignmentSeqTestCase(BalibaseTestCase):
"fasta")
yield records, filename
class AlignmentSeqTestCase(BalibaseTestCase):
def assertSameResidues(self, str1, str2):
"""Strip strings of their '-' before comparing them
"""
......@@ -288,9 +288,13 @@ class AlignmentSeqTestCase(BalibaseTestCase):
print(ex_r2)
raise
def test_benchmark_multiple_align(self):
"""Tests the multiple_align function (using blosum and gap extension)."""
from Bio import AlignIO
def create_clustalo_alignments(self):
"""
Submit alignment jobs to the EMBL pipeline to get reference ClustalO
alignments.
A rate limiting is applied, typically 5 jobs/second.
"""
import clustalo as cl
dataset_dir = os.path.join(self.balibase_path, "RV11.unaligned")
......@@ -315,6 +319,29 @@ class AlignmentSeqTestCase(BalibaseTestCase):
jobs.append(j)
for j in jobs:
j.join()
def test_benchmark_multiple_align(self):
"""Tests the multiple_align function (using blosum and gap extension)."""
res_dir = "balibase_results"
if not os.path.isdir(res_dir):
os.mkdir(res_dir)
# Cached
self.create_clustalo_alignments()
ref_dataset_dir = os.path.join(self.balibase_path, "RV11.aligned")
clustalo_dir = os.path.join(self.balibase_path, "clustalo")
scores = []
for filename in os.listdir(ref_dataset_dir):
scores.append(bali_score(
os.path.join(ref_dataset_dir, filename),
os.path.join(clustalo_dir, filename)
))
scores = pd.DataFrame(scores)
scores.to_csv(os.path.join(res_dir, "clustalo.csv"))
def test_align_dihedrals(self):
"""Iterates over the balibase dataset, match the chains from the mmcif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment