Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BIOINF588
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Etienne MORICE
BIOINF588
Commits
1b346096
Verified
Commit
1b346096
authored
6 years ago
by
Etienne MORICE
Browse files
Options
Downloads
Patches
Plain Diff
Benchmark for ClustalO
parent
c0e1e97e
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
.gitlab-ci.yml
+3
-0
3 additions, 0 deletions
.gitlab-ci.yml
alignementseq_tests.py
+38
-11
38 additions, 11 deletions
alignementseq_tests.py
with
42 additions
and
11 deletions
.gitignore
+
1
−
0
View file @
1b346096
...
...
@@ -6,3 +6,4 @@ timings.png
balibase.zip
*.fasta
data/
balibase_results/
This diff is collapsed.
Click to expand it.
.gitlab-ci.yml
+
3
−
0
View file @
1b346096
...
...
@@ -8,6 +8,9 @@ cache:
test
:
script
:
-
python3 -m unittest alignementseq_tests test_alignementseq_multiple
artifacts
:
paths
:
-
balibase_results/
test_performance
:
script
:
...
...
This diff is collapsed.
Click to expand it.
alignementseq_tests.py
+
38
−
11
View file @
1b346096
...
...
@@ -30,7 +30,7 @@ import os
import
sys
import
io
spec
,
m
=
None
,
None
bali_
spec
,
bali_
m
=
None
,
None
def
bali_score
(
ref_fasta_path
,
test_fasta_path
):
"""
Portable wrapper for the bali_score.py script.
...
...
@@ -39,16 +39,16 @@ def bali_score(ref_fasta_path, test_fasta_path):
Reproduces the results of the bali_score C program.
"""
path
=
os
.
path
.
join
(
"
balibase
"
,
"
bali_score.py
"
)
global
spec
,
m
if
spec
is
None
:
spec
=
importlib
.
util
.
spec_from_file_location
(
"
baliscore
"
,
path
)
m
=
importlib
.
util
.
module_from_spec
(
spec
)
global
bali_
spec
,
bali_
m
if
bali_
spec
is
None
:
bali_
spec
=
importlib
.
util
.
spec_from_file_location
(
"
baliscore
"
,
path
)
bali_
m
=
importlib
.
util
.
module_from_spec
(
bali_
spec
)
tmp_argv
=
sys
.
argv
tmp_stdout
=
sys
.
stdout
buf
=
sys
.
stdout
=
io
.
StringIO
()
sys
.
argv
=
[
path
,
ref_fasta_path
,
test_fasta_path
]
try
:
spec
.
loader
.
exec_module
(
m
)
bali_
spec
.
loader
.
exec_module
(
bali_
m
)
finally
:
sys
.
argv
=
tmp_argv
sys
.
stdout
=
tmp_stdout
...
...
@@ -162,8 +162,6 @@ class BalibaseTestCase(unittest.TestCase):
item
.
append
(
name
)
yield
item
class
AlignmentSeqTestCase
(
BalibaseTestCase
):
def
get_dataset_records
(
self
):
"""
Generator function to iterate over the record generator of each
unaligned fasta file.
...
...
@@ -175,6 +173,8 @@ class AlignmentSeqTestCase(BalibaseTestCase):
"
fasta
"
)
yield
records
,
filename
class
AlignmentSeqTestCase
(
BalibaseTestCase
):
def
assertSameResidues
(
self
,
str1
,
str2
):
"""
Strip strings of their
'
-
'
before comparing them
"""
...
...
@@ -288,9 +288,13 @@ class AlignmentSeqTestCase(BalibaseTestCase):
print
(
ex_r2
)
raise
def
test_benchmark_multiple_align
(
self
):
"""
Tests the multiple_align function (using blosum and gap extension).
"""
from
Bio
import
AlignIO
def
create_clustalo_alignments
(
self
):
"""
Submit alignment jobs to the EMBL pipeline to get reference ClustalO
alignments.
A rate limiting is applied, typically 5 jobs/second.
"""
import
clustalo
as
cl
dataset_dir
=
os
.
path
.
join
(
self
.
balibase_path
,
"
RV11.unaligned
"
)
...
...
@@ -315,6 +319,29 @@ class AlignmentSeqTestCase(BalibaseTestCase):
jobs
.
append
(
j
)
for
j
in
jobs
:
j
.
join
()
def
test_benchmark_multiple_align
(
self
):
"""
Tests the multiple_align function (using blosum and gap extension).
"""
res_dir
=
"
balibase_results
"
if
not
os
.
path
.
isdir
(
res_dir
):
os
.
mkdir
(
res_dir
)
# Cached
self
.
create_clustalo_alignments
()
ref_dataset_dir
=
os
.
path
.
join
(
self
.
balibase_path
,
"
RV11.aligned
"
)
clustalo_dir
=
os
.
path
.
join
(
self
.
balibase_path
,
"
clustalo
"
)
scores
=
[]
for
filename
in
os
.
listdir
(
ref_dataset_dir
):
scores
.
append
(
bali_score
(
os
.
path
.
join
(
ref_dataset_dir
,
filename
),
os
.
path
.
join
(
clustalo_dir
,
filename
)
))
scores
=
pd
.
DataFrame
(
scores
)
scores
.
to_csv
(
os
.
path
.
join
(
res_dir
,
"
clustalo.csv
"
))
def
test_align_dihedrals
(
self
):
"""
Iterates over the balibase dataset, match the chains from the mmcif
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment