Skip to content
Snippets Groups Projects
Verified Commit 764c5637 authored by Etienne MORICE's avatar Etienne MORICE
Browse files

Handle missing structural data more nicely.

parent 42c12aa1
No related branches found
No related tags found
No related merge requests found
...@@ -28,7 +28,9 @@ class BalibaseTestCase(unittest.TestCase): ...@@ -28,7 +28,9 @@ class BalibaseTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
balibase_zippath = "balibase.zip" balibase_zippath = "balibase.zip"
self.balibase_path = "balibase" self.balibase_path = "balibase"
self.exclude_fastas = ["BBS11037.fasta"] # This one does not contain correct self.exclude_fastas = [] # For problematic files that won't be handled.
# Try to handle them, this is a temporary or
# last resort solution.
# pdb codes as ids # pdb codes as ids
testfile_path = os.path.join( testfile_path = os.path.join(
self.balibase_path, self.balibase_path,
...@@ -88,18 +90,35 @@ class BalibaseTestCase(unittest.TestCase): ...@@ -88,18 +90,35 @@ class BalibaseTestCase(unittest.TestCase):
warnings.warn("Exluding file "+name+" according to exclusion list") warnings.warn("Exluding file "+name+" according to exclusion list")
continue continue
skip = False
item = [s1, s2, os1, os2] item = [s1, s2, os1, os2]
for s in s1, s2: for s in s1, s2:
fname = Bio.PDB.PDBList().retrieve_pdb_file( code = s.id[0:4]
s.id[0:4], fname = os.path.join('data', code+'.cif')
file_format="mmCif",
pdir="data" # Attempt download
) if not os.path.isfile(fname):
fname = Bio.PDB.PDBList().retrieve_pdb_file(
code,
file_format="mmCif",
pdir="data"
)
# Skip if still missing
if not os.path.isfile(fname):
warnings.warn("Could not retrieve PDB "+code+" (from "+name+"), skipping")
skip = True
break
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
struct = parser.get_structure(s.id[0:4], fname) struct = parser.get_structure(code, fname)
print(fname, s.id[0:4])
item.append(struct) item.append(struct)
if skip:
continue
item.append(name) item.append(name)
yield item yield item
...@@ -238,7 +257,7 @@ class AlignmentSeqTestCase(BalibaseTestCase): ...@@ -238,7 +257,7 @@ class AlignmentSeqTestCase(BalibaseTestCase):
str(Bio.PDB.Polypeptide.Polypeptide(c).get_sequence()).startswith(str(seq.seq)) str(Bio.PDB.Polypeptide.Polypeptide(c).get_sequence()).startswith(str(seq.seq))
) )
except StopIteration: except StopIteration:
warnings.warn("No suitable chain found for seq id "+seq.id+" in structure "+struct.id+", skipping file.") warnings.warn("No suitable chain found for seq id "+seq.id+" in structure "+struct.id+" (from "+name+"), skipping file.")
skip = True skip = True
break break
polyp = Bio.PDB.Polypeptide.Polypeptide(chain) polyp = Bio.PDB.Polypeptide.Polypeptide(chain)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment