Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BIOINF588
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container Registry
Model registry
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Etienne MORICE
BIOINF588
Commits
c04c6c35
Verified
Commit
c04c6c35
authored
6 years ago
by
Etienne MORICE
Browse files
Options
Downloads
Patches
Plain Diff
Basis for structural pairwise alignment with dihedral costs.
Still a draft.
parent
764c5637
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitlab-ci.yml
+1
-0
1 addition, 0 deletions
.gitlab-ci.yml
alignementseq.py
+35
-8
35 additions, 8 deletions
alignementseq.py
alignementseq_tests.py
+42
-6
42 additions, 6 deletions
alignementseq_tests.py
with
78 additions
and
14 deletions
.gitlab-ci.yml
+
1
−
0
View file @
c04c6c35
...
@@ -4,6 +4,7 @@ before_script:
...
@@ -4,6 +4,7 @@ before_script:
cache
:
cache
:
paths
:
paths
:
-
balibase/
-
balibase/
-
data/
test
:
test
:
script
:
script
:
-
python3 -m unittest alignementseq_tests test_alignementseq_multiple -v
-
python3 -m unittest alignementseq_tests test_alignementseq_multiple -v
...
...
This diff is collapsed.
Click to expand it.
alignementseq.py
+
35
−
8
View file @
c04c6c35
...
@@ -36,6 +36,23 @@ def score(a, b, gap=-8):
...
@@ -36,6 +36,23 @@ def score(a, b, gap=-8):
except
KeyError
:
except
KeyError
:
return
mat
[
b
,
a
]
return
mat
[
b
,
a
]
def
dihedral_score
(
angles1
,
angles2
,
phi0
=
1.
,
psi0
=
1.
):
"""
Tentative dihedral scoring component.
"""
phi1
,
psi1
=
angles1
phi2
,
psi2
=
angles2
s
=
0
if
phi1
is
not
None
and
phi2
is
not
None
:
err
=
(
phi2
-
phi1
)
/
phi0
err2
=
err
*
err
s
-=
err2
/
(
1
+
err2
)
if
psi1
is
not
None
and
psi2
is
not
None
:
err
=
(
psi2
-
psi1
)
/
psi0
err2
=
err
*
err
s
-=
err2
/
(
1
+
err2
)
return
s
def
align
(
seq1
,
seq2
,
score
=
score
):
def
align
(
seq1
,
seq2
,
score
=
score
):
"""
Main implementation of align, with blosum scoring and no specific gap
"""
Main implementation of align, with blosum scoring and no specific gap
extension penalties.
extension penalties.
...
@@ -43,6 +60,10 @@ def align(seq1, seq2, score=score):
...
@@ -43,6 +60,10 @@ def align(seq1, seq2, score=score):
Args:
Args:
seq1: A SeqRecord object
seq1: A SeqRecord object
seq2: Same.
seq2: Same.
Returns:
A (score, aseq1, aseq2) tuple, holding alignment score and aligned
sequences with
'
-
'
inserted.
"""
"""
x
=
seq1
.
seq
x
=
seq1
.
seq
...
@@ -217,7 +238,7 @@ def vec_align(seq1, seq2, mat=dict_to_weight(Bio.SubsMat.MatrixInfo.blosum62), g
...
@@ -217,7 +238,7 @@ def vec_align(seq1, seq2, mat=dict_to_weight(Bio.SubsMat.MatrixInfo.blosum62), g
aseq2
.
seq
=
Seq
(
aly
[::
-
1
])
aseq2
.
seq
=
Seq
(
aly
[::
-
1
])
return
S
[
n
+
m
][
0
],
aseq1
,
aseq2
return
S
[
n
+
m
][
0
],
aseq1
,
aseq2
def
align_dihedrals
(
seq1
,
seq2
,
struct1
,
struct
2
):
def
align_dihedrals
(
seq1
,
seq2
,
angles1
,
angles
2
):
"""
"""
Align two sequences, by using a simple cost function using dihedral angles
Align two sequences, by using a simple cost function using dihedral angles
as well as amino acids names.
as well as amino acids names.
...
@@ -225,14 +246,16 @@ def align_dihedrals(seq1, seq2, struct1, struct2):
...
@@ -225,14 +246,16 @@ def align_dihedrals(seq1, seq2, struct1, struct2):
Arguments:
Arguments:
seq1: First sequence as SeqRecord
seq1: First sequence as SeqRecord
seq2:
seq2:
struct1: First structure as Bio.PDB objec
t
angles1: Dihedral angles for seq1, as a zipped phi-psi lis
t
struct
2:
angles
2:
"""
"""
print
(
"
Not implemented
"
)
align2steps
(
seq1
,
seq2
,
d
=
8
,
e
=
4
,
angles
=
True
,
angles1
=
angles1
,
angles2
=
angles2
)
def
align2steps
(
seq1
,
seq2
,
d
=
8
,
e
=
4
):
def
align2steps
(
seq1
,
seq2
,
d
=
8
,
e
=
4
,
angles
=
False
,
angles1
=
None
,
angles2
=
None
):
"""
Main implementation of align, handling also gap extensions.
"""
Main implementation of align, handling also gap extensions.
Adatped to handle dihedral costs.
"""
"""
x
=
seq1
.
seq
x
=
seq1
.
seq
y
=
seq2
.
seq
y
=
seq2
.
seq
...
@@ -270,10 +293,14 @@ def align2steps(seq1, seq2, d=8, e=4):
...
@@ -270,10 +293,14 @@ def align2steps(seq1, seq2, d=8, e=4):
a
=
x
[
i
-
1
]
a
=
x
[
i
-
1
]
b
=
y
[
j
-
1
]
b
=
y
[
j
-
1
]
s
=
score
(
a
,
b
)
if
(
angles
):
s
+=
10
*
dihedral_score
(
angles1
[
i
-
1
],
angles2
[
j
-
1
])
# find max for M
# find max for M
s1
=
mm
[
i
-
1
][
j
-
1
]
+
s
core
(
a
,
b
)
s1
=
mm
[
i
-
1
][
j
-
1
]
+
s
s2
=
mx
[
i
-
1
][
j
-
1
]
+
s
core
(
a
,
b
)
s2
=
mx
[
i
-
1
][
j
-
1
]
+
s
s3
=
my
[
i
-
1
][
j
-
1
]
+
s
core
(
a
,
b
)
s3
=
my
[
i
-
1
][
j
-
1
]
+
s
if
s1
>=
s2
:
if
s1
>=
s2
:
if
s1
>=
s3
:
if
s1
>=
s3
:
...
...
This diff is collapsed.
Click to expand it.
alignementseq_tests.py
+
42
−
6
View file @
c04c6c35
...
@@ -9,6 +9,7 @@ import zipfile
...
@@ -9,6 +9,7 @@ import zipfile
import
warnings
import
warnings
import
timeit
import
timeit
import
multiprocessing
import
multiprocessing
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
import
matplotlib
import
matplotlib
matplotlib
.
use
(
'
Agg
'
)
matplotlib
.
use
(
'
Agg
'
)
...
@@ -249,20 +250,55 @@ class AlignmentSeqTestCase(BalibaseTestCase):
...
@@ -249,20 +250,55 @@ class AlignmentSeqTestCase(BalibaseTestCase):
raise
raise
def
test_align_dihedrals
(
self
):
def
test_align_dihedrals
(
self
):
"""
Iterates over the balibase dataset, match the chains from the mmcif
and align with dihedral angle scores
"""
from
alignementseq
import
vec_align
,
align_dihedrals
score_fn
=
lambda
a
,
b
:
0
if
a
==
''
or
b
==
''
else
0
if
a
!=
b
else
1
for
seq1
,
seq2
,
oseq1
,
oseq2
,
struct1
,
struct2
,
name
in
self
.
get_datasets_heads_with_struct
():
for
seq1
,
seq2
,
oseq1
,
oseq2
,
struct1
,
struct2
,
name
in
self
.
get_datasets_heads_with_struct
():
skip
=
False
skip
=
False
for
seq
,
struct
in
((
oseq1
,
struct1
),
(
oseq2
,
struct2
)):
angles1
=
[]
try
:
angles2
=
[]
chain
=
next
(
c
for
c
in
struct
.
get_chains
()
if
for
seq
,
struct
,
angles
in
((
oseq1
,
struct1
,
angles1
),
(
oseq2
,
struct2
,
angles2
)):
str
(
Bio
.
PDB
.
Polypeptide
.
Polypeptide
(
c
).
get_sequence
()).
startswith
(
str
(
seq
.
seq
))
chain
,
match
,
achain
,
aseq
=
max
(((
c
,
*
vec_align
(
SeqRecord
(
Bio
.
PDB
.
Polypeptide
.
Polypeptide
(
c
).
get_sequence
()),
seq
,
mat
=
np
.
eye
(
26
),
gap
=
0
)
)
for
c
in
struct
.
get_chains
()),
key
=
(
lambda
t
:
t
[
1
])
)
)
except
StopIteration
:
ident
=
match
/
len
(
seq
.
seq
)
warnings
.
warn
(
"
No suitable chain found for seq id
"
+
seq
.
id
+
"
in structure
"
+
struct
.
id
+
"
(from
"
+
name
+
"
), skipping file.
"
)
if
(
ident
<
0.95
):
warnings
.
warn
(
"
No chain found with more than 0.95 identity
"
,
"
(best was
"
,
ident
,
"
), skipping this file.
"
)
skip
=
True
skip
=
True
break
break
#print("Matched chain with", match*100/len(seq.seq), "identity.")
polyp
=
Bio
.
PDB
.
Polypeptide
.
Polypeptide
(
chain
)
polyp
=
Bio
.
PDB
.
Polypeptide
.
Polypeptide
(
chain
)
all_angles
=
polyp
.
get_phi_psi_list
()
it_angles
,
it_aseq
,
it_achain
=
iter
(
all_angles
),
iter
(
aseq
),
iter
(
achain
)
try
:
while
True
:
ref_aa
=
next
(
it_aseq
)
if
ref_aa
==
'
-
'
:
next
(
it_achain
)
next
(
it_angles
)
elif
next
(
it_achain
)
==
'
-
'
:
angles
.
append
((
None
,
None
))
else
:
angles
.
append
(
next
(
it_angles
))
except
StopIteration
:
pass
if
skip
:
if
skip
:
continue
continue
align_dihedrals
(
oseq1
,
oseq2
,
angles1
,
angles2
)
# =============================================================================
# =============================================================================
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment