Skip to content
Snippets Groups Projects
Verified Commit c04c6c35 authored by Etienne MORICE's avatar Etienne MORICE
Browse files

Basis for structural pairwise alignment with dihedral costs.

Still a draft.
parent 764c5637
No related branches found
No related tags found
No related merge requests found
...@@ -4,6 +4,7 @@ before_script: ...@@ -4,6 +4,7 @@ before_script:
cache: cache:
paths: paths:
- balibase/ - balibase/
- data/
test: test:
script: script:
- python3 -m unittest alignementseq_tests test_alignementseq_multiple -v - python3 -m unittest alignementseq_tests test_alignementseq_multiple -v
......
...@@ -36,6 +36,23 @@ def score(a, b, gap=-8): ...@@ -36,6 +36,23 @@ def score(a, b, gap=-8):
except KeyError: except KeyError:
return mat[b,a] return mat[b,a]
def dihedral_score(angles1, angles2, phi0=1., psi0=1.):
"""
Tentative dihedral scoring component.
"""
phi1, psi1 = angles1
phi2, psi2 = angles2
s = 0
if phi1 is not None and phi2 is not None:
err = (phi2 - phi1) / phi0
err2 = err*err
s -= err2 / (1 + err2)
if psi1 is not None and psi2 is not None:
err = (psi2 - psi1) / psi0
err2 = err*err
s -= err2 / (1 + err2)
return s
def align(seq1, seq2, score=score): def align(seq1, seq2, score=score):
"""Main implementation of align, with blosum scoring and no specific gap """Main implementation of align, with blosum scoring and no specific gap
extension penalties. extension penalties.
...@@ -43,6 +60,10 @@ def align(seq1, seq2, score=score): ...@@ -43,6 +60,10 @@ def align(seq1, seq2, score=score):
Args: Args:
seq1: A SeqRecord object seq1: A SeqRecord object
seq2: Same. seq2: Same.
Returns:
A (score, aseq1, aseq2) tuple, holding alignment score and aligned
sequences with '-' inserted.
""" """
x = seq1.seq x = seq1.seq
...@@ -217,7 +238,7 @@ def vec_align(seq1, seq2, mat=dict_to_weight(Bio.SubsMat.MatrixInfo.blosum62), g ...@@ -217,7 +238,7 @@ def vec_align(seq1, seq2, mat=dict_to_weight(Bio.SubsMat.MatrixInfo.blosum62), g
aseq2.seq = Seq(aly[::-1]) aseq2.seq = Seq(aly[::-1])
return S[n+m][0], aseq1, aseq2 return S[n+m][0], aseq1, aseq2
def align_dihedrals(seq1, seq2, struct1, struct2): def align_dihedrals(seq1, seq2, angles1, angles2):
""" """
Align two sequences, by using a simple cost function using dihedral angles Align two sequences, by using a simple cost function using dihedral angles
as well as amino acids names. as well as amino acids names.
...@@ -225,14 +246,16 @@ def align_dihedrals(seq1, seq2, struct1, struct2): ...@@ -225,14 +246,16 @@ def align_dihedrals(seq1, seq2, struct1, struct2):
Arguments: Arguments:
seq1: First sequence as SeqRecord seq1: First sequence as SeqRecord
seq2: seq2:
struct1: First structure as Bio.PDB object angles1: Dihedral angles for seq1, as a zipped phi-psi list
struct2: angles2:
""" """
print("Not implemented") align2steps(seq1, seq2, d=8, e=4, angles=True, angles1=angles1, angles2=angles2)
def align2steps(seq1, seq2, d=8, e=4): def align2steps(seq1, seq2, d=8, e=4, angles=False, angles1=None, angles2=None):
"""Main implementation of align, handling also gap extensions. """Main implementation of align, handling also gap extensions.
Adatped to handle dihedral costs.
""" """
x = seq1.seq x = seq1.seq
y = seq2.seq y = seq2.seq
...@@ -270,10 +293,14 @@ def align2steps(seq1, seq2, d=8, e=4): ...@@ -270,10 +293,14 @@ def align2steps(seq1, seq2, d=8, e=4):
a = x[i-1] a = x[i-1]
b = y[j-1] b = y[j-1]
s = score(a,b)
if(angles):
s += 10 * dihedral_score(angles1[i-1], angles2[j-1])
# find max for M # find max for M
s1 = mm[i-1][j-1] + score(a,b) s1 = mm[i-1][j-1] + s
s2 = mx[i-1][j-1] + score(a,b) s2 = mx[i-1][j-1] + s
s3 = my[i-1][j-1] + score(a,b) s3 = my[i-1][j-1] + s
if s1 >= s2: if s1 >= s2:
if s1 >= s3: if s1 >= s3:
......
...@@ -9,6 +9,7 @@ import zipfile ...@@ -9,6 +9,7 @@ import zipfile
import warnings import warnings
import timeit import timeit
import multiprocessing import multiprocessing
import numpy as np
import pandas as pd import pandas as pd
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
...@@ -249,20 +250,55 @@ class AlignmentSeqTestCase(BalibaseTestCase): ...@@ -249,20 +250,55 @@ class AlignmentSeqTestCase(BalibaseTestCase):
raise raise
def test_align_dihedrals(self): def test_align_dihedrals(self):
"""Iterates over the balibase dataset, match the chains from the mmcif
and align with dihedral angle scores
"""
from alignementseq import vec_align, align_dihedrals
score_fn = lambda a,b : 0 if a == '' or b == '' else 0 if a != b else 1
for seq1, seq2, oseq1, oseq2, struct1, struct2, name in self.get_datasets_heads_with_struct(): for seq1, seq2, oseq1, oseq2, struct1, struct2, name in self.get_datasets_heads_with_struct():
skip = False skip = False
for seq, struct in ((oseq1, struct1), (oseq2, struct2)): angles1 = []
try: angles2 = []
chain = next(c for c in struct.get_chains() if for seq, struct, angles in ((oseq1, struct1, angles1), (oseq2, struct2, angles2)):
str(Bio.PDB.Polypeptide.Polypeptide(c).get_sequence()).startswith(str(seq.seq)) chain, match, achain, aseq = max(((
c,
*vec_align(
SeqRecord(Bio.PDB.Polypeptide.Polypeptide(c).get_sequence()),
seq,
mat=np.eye(26), gap=0)
) for c in struct.get_chains()),
key=(lambda t : t[1])
) )
except StopIteration: ident = match/len(seq.seq)
warnings.warn("No suitable chain found for seq id "+seq.id+" in structure "+struct.id+" (from "+name+"), skipping file.") if(ident < 0.95):
warnings.warn("No chain found with more than 0.95 identity",
"(best was", ident, "), skipping this file.")
skip = True skip = True
break break
#print("Matched chain with", match*100/len(seq.seq), "identity.")
polyp = Bio.PDB.Polypeptide.Polypeptide(chain) polyp = Bio.PDB.Polypeptide.Polypeptide(chain)
all_angles = polyp.get_phi_psi_list()
it_angles, it_aseq, it_achain = iter(all_angles), iter(aseq), iter(achain)
try:
while True:
ref_aa = next(it_aseq)
if ref_aa == '-':
next(it_achain)
next(it_angles)
elif next(it_achain) == '-':
angles.append((None,None))
else:
angles.append(next(it_angles))
except StopIteration:
pass
if skip: if skip:
continue continue
align_dihedrals(oseq1, oseq2, angles1, angles2)
# ============================================================================= # =============================================================================
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment