Python源码示例:Bio.SeqFeature.FeatureLocation()
示例1
def add_point_feature(self, resnum, feat_type=None, feat_id=None, qualifiers=None):
"""Add a feature to the features list describing a single residue.
Args:
resnum (int): Protein sequence residue number
feat_type (str, optional): Optional description of the feature type (ie. 'catalytic residue')
feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')
"""
if self.feature_file:
raise ValueError('Feature file associated with sequence, please remove file association to append '
'additional features.')
if not feat_type:
feat_type = 'Manually added protein sequence single residue feature'
newfeat = SeqFeature(location=FeatureLocation(ExactPosition(resnum-1), ExactPosition(resnum)),
type=feat_type,
id=feat_id,
qualifiers=qualifiers)
self.features.append(newfeat)
示例2
def add_region_feature(self, start_resnum, end_resnum, feat_type=None, feat_id=None, qualifiers=None):
"""Add a feature to the features list describing a region of the protein sequence.
Args:
start_resnum (int): Start residue number of the protein sequence feature
end_resnum (int): End residue number of the protein sequence feature
feat_type (str, optional): Optional description of the feature type (ie. 'binding domain')
feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')
"""
if self.feature_file:
raise ValueError('Feature file associated with sequence, please remove file association to append '
'additional features.')
if not feat_type:
feat_type = 'Manually added protein sequence region feature'
newfeat = SeqFeature(location=FeatureLocation(start_resnum-1, end_resnum),
type=feat_type,
id=feat_id,
qualifiers=qualifiers)
self.features.append(newfeat)
示例3
def get_residue_annotations(self, start_resnum, end_resnum=None):
"""Retrieve letter annotations for a residue or a range of residues
Args:
start_resnum (int): Residue number
end_resnum (int): Optional residue number, specify if a range is desired
Returns:
dict: Letter annotations for this residue or residues
"""
if not end_resnum:
end_resnum = start_resnum
# Create a new SeqFeature
f = SeqFeature(FeatureLocation(start_resnum - 1, end_resnum))
# Get sequence properties
return f.extract(self).letter_annotations
示例4
def test_translate_feature(self):
'''
Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids
'''
# Seq -> Amino https://en.wikipedia.org/wiki/DNA_codon_table
seq1 = Seq("TTTCTTATGGTCGTA")
seq2 = Seq("TCTTCAACTGCTACA")
seq3 = Seq("CATAATGAATATAAT")
aln = {'seq1': seq1,
'seq2': seq2,
'seq3': seq3}
feature = SeqFeature(FeatureLocation(0, 15), type="domain")
# expected results
expected_translations = {'seq1': 'FLMVV',
'seq2': 'SSTAT',
'seq3': 'HNEYN'}
assert translate.translate_feature(aln, feature) == expected_translations
# TODO: test_vcf_feature, assign_aa_vcf, assign_aa_fasta
# Unclear how to emulate inputs (TreeTime dict, tree)
示例5
def create_faux_record_from_proteins(proteins, id):
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
record = SeqRecord(seq=Seq(''), id=id)
start = 0
end = 0
max_protein_id_len = 45
for protein in proteins:
nucl_length = len(protein.seq) * 3
end += nucl_length
feature = SeqFeature(
location=FeatureLocation(start, end, strand=1),
type="CDS",
qualifiers={
'protein_id': [protein.id[:max_protein_id_len]],
'translation': [str(protein.seq)]
}
)
start += nucl_length
record.features.append(feature)
return record
示例6
def fetch_source_feature(self, gb_record):
source_feature = None
has_source = False
for i in gb_record.features:
if i.type == "source":
source_feature = i
has_source = True
break
if not has_source:
##加一个source feature
my_start_pos = SeqFeature.ExactPosition(0)
my_end_pos = SeqFeature.ExactPosition(len(gb_record.seq))
my_feature_location = FeatureLocation(my_start_pos, my_end_pos)
my_feature_type = "source"
source_feature = SeqFeature.SeqFeature(my_feature_location, type=my_feature_type)
gb_record.features.insert(0, source_feature)
return source_feature
示例7
def drawFig(self):
gdd = GenomeDiagram.Diagram('linear figure')
gdt_features = gdd.new_track(1, greytrack=False, scale=0, height=0.4)
gds_features = gdt_features.new_set()
for name, start, stop in self.list_name_start_stop:
if "COX" in name.upper():
color = "#81CEEA"
elif "NAD" in name.upper():
color = "#F9C997"
elif "ATP" in name.upper():
color = "#E97E8D"
elif ("CYTB" in name.upper()) or ("COB" in name.upper()):
color = "#E2E796"
elif "RRN" in name.upper():
color = "#94F2DB"
# strand = -1 if name in ["nad1", "cytb", "nad4", "nad4L", "rrnL"] else 1
feature = SeqFeature(FeatureLocation(int(start), int(stop)), strand=1)
gds_features.add_feature(feature, name=name, label=True,
label_size=self.dict_args["label_size"], label_angle=self.dict_args["Label_angle"],
color=self.dict_args["Label_color"], label_position=self.dict_args["Label_position"],
sigil="BIGARROW", arrowshaft_height=0.5,
arrowhead_length=0.5)
gdd.draw(format='linear', pagesize=(self.dict_args["fig_width"] * cm, self.dict_args["fig_height"] * cm), fragments=1,
start=0, end=int(stop))
gdd.write(self.dict_args["exportPath"] + os.sep + "linear.pdf", "pdf")
示例8
def test_add_results_to_record(self):
pfams = {'PF00015.2': FeatureLocation(0, 3), 'PF00351.1': FeatureLocation(0, 3),
'PF00015.27': FeatureLocation(3, 6)}
fake_record = set_dummy_with_pfams(pfams)
fake_duplicate_pfam = DummyPFAMDomain(identifier="PF00015.2")
fake_record.add_pfam_domain(fake_duplicate_pfam)
assert fake_duplicate_pfam in fake_record.get_pfam_domains()
gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
fake_results.add_to_record(fake_record)
assert fake_duplicate_pfam.full_identifier == 'PF00015.2'
for pfam in fake_record.get_pfam_domains():
assert sorted(pfam.gene_ontologies.ids) == sorted(fake_results.get_all_gos(pfam))
# make sure identical pfams (with different version numbers) all have the same gene ontologies
if pfam.identifier == "PF00015":
assert pfam.version in [2, 27]
assert sorted(pfam.gene_ontologies.ids) == sorted(fake_results.get_all_gos(fake_duplicate_pfam))
示例9
def test_to_json(self):
fake_pfam_location = FeatureLocation(0, 12)
pfams = {'PF00015': fake_pfam_location, 'PF00351': fake_pfam_location}
fake_record = set_dummy_with_pfams(pfams)
gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
result_json = fake_results.to_json()
expected_result = {"pfams": {"PF00015": {"GO:0007165": "signal transduction",
"GO:0016020": "membrane"},
"PF00351": {"GO:0016714": ("oxidoreductase activity, acting on paired donors, "
"with incorporation or reduction of molecular oxygen, "
"reduced pteridine as one donor, and incorporation of "
"one atom of oxygen"),
"GO:0055114": "oxidation-reduction process"}},
"record_id": fake_record.id,
"schema_version": 1}
assert result_json["record_id"] == expected_result["record_id"]
assert result_json["schema_version"] == 1
for pfam in expected_result["pfams"]:
assert expected_result["pfams"][pfam] == result_json["pfams"][pfam]
示例10
def test_from_wrong_schema(self):
fake_pfam_location = FeatureLocation(0, 12)
pfams = {'PF00015': fake_pfam_location, 'PF00351': fake_pfam_location, 'PF05147': fake_pfam_location}
fake_record = set_dummy_with_pfams(pfams)
broken_json = {"pfams": {"PF00015": {"GO:0004871": "signal transducer activity",
"GO:0007165": "signal transduction",
"GO:0016020": "membrane"},
"PF00351": {"GO:0016714": ("oxidoreductase activity, acting on paired donors, "
"with incorporation or reduction of molecular oxygen, "
"reduced pteridine as one donor, and incorporation of "
"one atom of oxygen"),
"GO:0055114": "oxidation-reduction process"}},
"record_id": fake_record.id,
"schema_version": 2}
with self.assertLogs() as log_cm:
from_broken_json = pfam2go.Pfam2GoResults.from_json(broken_json, fake_record)
assert "Schema version mismatch, discarding Pfam2GO results" in str(log_cm.output)
assert not from_broken_json
示例11
def test_first_gene_forward(self, patched_enumerate):
# ensure coverage only considers this gene of interest
gene_of_interest = self.add_gene("A", 10, 20, 1)
patched_enumerate.return_value = [(0, gene_of_interest)]
other_gene = self.add_gene("B", 30, 40, 1)
for strand in [1, -1]:
other_gene.location = FeatureLocation(30, 40, strand)
print(other_gene.location)
promoters = self.get_promoters(5, 75)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "A", 5, 20)
promoters = self.get_promoters(25, 75)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "A", 0, 20)
promoters = self.get_promoters(5, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "A", 5, 15)
promoters = self.get_promoters(25, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "A", 0, 15)
示例12
def test_last_gene_forward(self, patched_enumerate):
other_gene = self.add_gene("A", 10, 20, 1)
# ensure coverage only considers this gene of interest
gene_of_interest = self.add_gene("B", 30, 40, 1)
patched_enumerate.return_value = [(1, gene_of_interest)]
for strand in [1, -1]:
other_gene.location = FeatureLocation(10, 20, strand)
print(other_gene.location)
promoters = self.get_promoters(5, 75)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 25, 40)
promoters = self.get_promoters(25, 75)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 21, 40)
promoters = self.get_promoters(5, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 25, 35)
promoters = self.get_promoters(25, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 21, 35)
示例13
def test_normal_case_forward(self, patched_enumerate):
other = self.add_gene("A", 10, 20, 1)
gene_of_interest = self.add_gene("B", 40, 60, 1)
self.add_gene("C", 70, 80, 1)
patched_enumerate.return_value = [(1, gene_of_interest)]
for strand in [-1, 1]:
other.location = FeatureLocation(other.location.start, other.location.end, strand)
promoters = self.get_promoters(5, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 35, 45)
promoters = self.get_promoters(5, 25)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 35, 60)
promoters = self.get_promoters(25, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 21, 45)
promoters = self.get_promoters(25, 25)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 21, 60)
示例14
def test_normal_case_reverse(self, patched_enumerate):
self.add_gene("A", 10, 20, 1)
gene_of_interest = self.add_gene("B", 40, 60, -1)
other = self.add_gene("C", 70, 80, -1)
patched_enumerate.return_value = [(1, gene_of_interest)]
for strand in [-1]:
other.location = FeatureLocation(other.location.start, other.location.end, strand)
promoters = self.get_promoters(5, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 55, 65)
promoters = self.get_promoters(5, 25)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 40, 65)
promoters = self.get_promoters(25, 5)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 55, 69)
promoters = self.get_promoters(25, 25)
assert len(promoters) == 1
self.check_single_promoter(promoters[0], "B", 40, 69)
示例15
def get_aa_translation_from_location(self, location: FeatureLocation,
transl_table: Union[str, int] = None) -> Seq:
""" Obtain the translation for a feature based on its location """
if location.end > len(self.seq):
raise ValueError("location outside available sequence")
if transl_table is None:
transl_table = self._transl_table
extracted = location.extract(self.seq).ungap('-')
if len(extracted) % 3 != 0:
extracted = extracted[:-(len(extracted) % 3)]
seq = extracted.translate(to_stop=True, table=transl_table)
if not seq:
# go past stop codons and hope for something to work with
seq = extracted.translate(table=transl_table)
# replace ambiguous proteins with an explicit unknown
string_version = str(seq)
for invalid in "*BJOUZ":
string_version = string_version.replace(invalid, "X")
seq = Seq(string_version, Alphabet.generic_protein)
if "-" in str(seq):
seq = Seq(str(seq).replace("-", ""), Alphabet.generic_protein)
return seq
示例16
def create_feature_from_location(record: Record, location: FeatureLocation,
label: Optional[str] = None) -> CDSFeature:
""" Creates a CDS feature covering the provided location.
Arguments:
record: The Record the CDSFeature will belong to, used to generate
the feature translation
location: The FeatureLocation specifying the location of the CDSFeature
label: The locus tag, protein id, and gene name to use for the new
CDSFeature
Returns:
The CDSFeature created.
"""
if label is None:
digits = len(str(len(record)))
label = 'allorf_{start:0{digits}}_{end:0{digits}}'.format(
digits=digits, start=(location.start + 1), end=location.end
)
feature = CDSFeature(location, str(record.get_aa_translation_from_location(location)),
locus_tag=label, protein_id=label, gene=label)
feature.created_by_antismash = True
return feature
示例17
def _set_before(self, location):
"""
Changes a FeatureLocation to include a "BeforePosition" or
"AfterPosition" to indicate that the mRNA does not include
start codon.
"""
if location.strand >= 0: # forward strand
if len(location.parts) > 1:
location.parts[0] = FeatureLocation( BeforePosition(location.parts[0].start), location.parts[0].end, strand = location.parts[0].strand )
else:
location = FeatureLocation( BeforePosition(location.start), location.end, strand = location.strand)
else:
if len(location.parts) > 1:
location.parts[-1] = FeatureLocation( location.parts[-1].start, AfterPosition(location.parts[-1].end), strand = location.parts[-1].strand )
else:
location = FeatureLocation( location.start, AfterPosition(location.end), strand = location.strand)
return location
示例18
def _set_after(self, location):
"""
Changes a FeatureLocation to include a "BeforePosition" or
"AfterPosition" to indicate that the mRNA does not include
stop codon.
"""
if location.strand >= 0: # forward strand
if len(location.parts) > 1:
location.parts[-1] = FeatureLocation( location.parts[-1].start, AfterPosition(location.parts[-1].end), strand = location.parts[-1].strand )
else:
location = FeatureLocation( location.start, AfterPosition(location.end), strand = location.strand)
else:
if len(location.parts) > 1:
location.parts[0] = FeatureLocation( BeforePosition(location.parts[0].start), location.parts[0].end, strand = location.parts[0].strand )
else:
location = FeatureLocation( BeforePosition(location.start), location.end, strand = location.strand)
return location
示例19
def get_subsequence(self, resnums, new_id=None, copy_letter_annotations=True):
"""Get a subsequence as a new SeqProp object given a list of residue numbers"""
# XTODO: documentation
if not self.seq_record:
raise ValueError('No chain sequence stored')
biop_compound_list = []
for resnum in resnums:
feat = FeatureLocation(resnum - 1, resnum)
biop_compound_list.append(feat)
if len(biop_compound_list) == 0:
log.info('Zero length subsequences')
return
elif len(biop_compound_list) == 1:
log.debug('Subsequence only one residue long')
sub_feature_location = biop_compound_list[0]
else:
sub_feature_location = CompoundLocation(biop_compound_list)
sub_feature = sub_feature_location.extract(self.seq_record)
if not new_id:
new_id = '{}_subseq'.format(self.id)
new_sp = SeqProp(id=new_id, seq=sub_feature)
if copy_letter_annotations:
new_sp.letter_annotations = sub_feature.letter_annotations
return new_sp
示例20
def get_subsequence(self, resnums, new_id=None, copy_letter_annotations=True):
"""Get a subsequence as a new SeqProp object given a list of residue numbers"""
# XTODO: documentation
biop_compound_list = []
for resnum in resnums:
# XTODO can be sped up by separating into ranges based on continuous resnums
feat = FeatureLocation(resnum - 1, resnum)
biop_compound_list.append(feat)
if len(biop_compound_list) == 0:
log.debug('Zero length subsequence')
return
elif len(biop_compound_list) == 1:
log.debug('Subsequence only one residue long')
sub_feature_location = biop_compound_list[0]
else:
sub_feature_location = CompoundLocation(biop_compound_list)
try:
sub_feature = sub_feature_location.extract(self)
except TypeError:
log.critical('SeqProp {}: unknown error when trying to get subsequence - please investigate! '
'Try using a feature to extract a subsequence from the SeqProp'.format(self.id))
return
if not new_id:
new_id = '{}_subseq'.format(self.id)
new_sp = SeqProp(id=new_id, seq=sub_feature.seq)
if copy_letter_annotations:
new_sp.letter_annotations = sub_feature.letter_annotations
return new_sp
示例21
def test_merge_split(self):
"""
Test the merge and split function
Paracoccus_yeei_TT13.gb has the following compound locations:
PYTT13_06780: join{[1366920:1367182](-), [1365992:1366921](-)}
[1365992:1367182](-)
PYTT13_11395: join{[2283890:2284152](+), [2284151:2285080](+)}
[2283890:2285080](+)
PYTT13_11465: join{[2301567:2301817](+), [2301816:2302655](+)}
[2301567:2302655](+)
PYTT13_12460: join{[2495319:2495581](+), [2495580:2496509](+)}
[2495319:2496509](+)
PYTT13_16505: join{[3331106:3331356](+), [3331355:3332194](+)}
[3331106:3332194](+)
:return:
"""
correct_locations = {
'PYTT13_06780': (1365992, 1367182),
'PYTT13_11395': (2283890, 2285080),
'PYTT13_11465': (2301567, 2302655),
'PYTT13_12460': (2495319, 2496509),
'PYTT13_16505': (3331106, 3332194)
}
testgbk = "test_genbank_files/Paracoccus_yeei_TT13.gb.gz"
handle = gzip.open(testgbk, 'rt')
record = SeqioFilter(SeqIO.parse(handle, "genbank"))
handle.close()
for s in record:
for f in s.get_features("CDS"):
if 'locus_tag' in f.qualifiers and f.qualifiers['locus_tag'][0] in correct_locations:
lt = f.qualifiers['locus_tag'][0]
self.assertIsInstance(f.location,FeatureLocation)
self.assertEqual(f.location.start, correct_locations[lt][0])
self.assertEqual(f.location.end, correct_locations[lt][1])
示例22
def write_genbank(self):
"""
Write prophages and their potential attachment sites in updated input GenBank file.
:param self: the data object
:return: None
"""
log_and_message("Writing GenBank output file", c="GREEN", stderr=True, quiet=self.quiet)
prophage_feature_type = 'misc_feature' # / prophage_region
outfile = os.path.join(self.output_dir, self.file_prefix + os.path.basename(self.infile))
for i in self.pp:
self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
location=FeatureLocation(self.pp[i]['start'], self.pp[i]['stop']),
type=prophage_feature_type,
strand=1,
qualifiers=OrderedDict(
{'note': f'prophage region pp{i} identified with PhiSpy v{version.__version__}'}
)))
if 'atts' in self.pp[i]:
self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
location=FeatureLocation(int(self.pp[i]['att'][0]), int(self.pp[i]['att'][1])) +
FeatureLocation(int(self.pp[i]['att'][2]), int(self.pp[i]['att'][3])),
type='repeat_region',
strand=1,
qualifiers=OrderedDict({'note': f'prophage region pp{i} potential attachment sites'})))
# are we writing a gzip file
if is_gzip_file(self.infile):
handle = gzip.open(outfile, 'wt')
else:
handle = open(outfile, 'w')
SeqIO.write(self.record, handle, 'genbank')
示例23
def _get_pfam_loc(self, query_start, query_end, feature):
if feature.strand == 1:
start = feature.location.start + 3 * query_start
end = feature.location.start + 3 * query_end
elif feature.strand == -1:
end = feature.location.end - 3 * query_start
start = feature.location.end - 3 * query_end
else:
raise ValueError('Invalid strand for feature: {}'.format(feature))
return FeatureLocation(start, end, strand=feature.strand)
示例24
def processed_record(detector_name='deepbgc', detector_label='deepbgc', score_threshold=0.5):
comment_key = util.format_detector_meta_key(detector_label)
record = SeqRecord(Seq('ACTGCTCGACTGATT', alphabet=generic_dna))
record.annotations['structured_comment'] = collections.OrderedDict()
record.annotations['structured_comment'][comment_key] = collections.OrderedDict(
name=detector_name,
label=detector_label,
score_threshold=score_threshold
)
# Add protein features
record.features.append(SeqFeature(FeatureLocation(0, 2), type='CDS', qualifiers={'locus_tag': ['A']}))
record.features.append(SeqFeature(FeatureLocation(2, 5), type='CDS', qualifiers={'locus_tag': ['B']}))
record.features.append(SeqFeature(FeatureLocation(5, 8), type='CDS', qualifiers={'locus_tag': ['C']}))
# Add pfam features
score_column = util.format_bgc_score_column(detector_name)
qualifiers = {score_column: [0.4], 'db_xref': ['PF00001'], 'locus_tag': ['A'], 'database': [PFAM_DB_VERSION]}
record.features.append(SeqFeature(FeatureLocation(0, 2), type=util.PFAM_FEATURE, qualifiers=qualifiers))
qualifiers = {score_column: [0.7], 'db_xref': ['PF00002'], 'locus_tag': ['B'], 'database': [PFAM_DB_VERSION]}
record.features.append(SeqFeature(FeatureLocation(2, 5), type=util.PFAM_FEATURE, qualifiers=qualifiers))
qualifiers = {score_column: [0.6], 'db_xref': ['PF00003'], 'locus_tag': ['C'], 'database': [PFAM_DB_VERSION]}
record.features.append(SeqFeature(FeatureLocation(5, 8), type=util.PFAM_FEATURE, qualifiers=qualifiers))
# Add BGC features
qualifiers = { score_column: ['0.6'], 'detector': [detector_name], 'detector_label': [detector_label]}
record.features.append(SeqFeature(FeatureLocation(0, 5), type='cluster', qualifiers=qualifiers))
qualifiers = { 'detector': ['annotated'], 'detector_label': ['annotated']}
record.features.append(SeqFeature(FeatureLocation(2, 8), type='cluster', qualifiers=qualifiers))
return record
示例25
def test_feature_creation(self):
fw_loc = FeatureLocation(210, 300, strand=1)
fw_feature = SeqFeature(fw_loc, type='CDS')
results = tta.tta.TTAResults('dummy', gc_content=1, threshold=0.65)
ret = results.new_feature_from_other(fw_feature, 12)
self.assertEqual(ret.strand, 1)
self.assertEqual(ret.location.start, 222)
self.assertEqual(ret.location.end, 225)
rv_loc = FeatureLocation(210, 300, strand=-1)
rv_feature = SeqFeature(rv_loc, type='CDS')
ret = results.new_feature_from_other(rv_feature, 12)
self.assertEqual(ret.strand, -1)
self.assertEqual(ret.location.start, 285)
self.assertEqual(ret.location.end, 288)
示例26
def set_dummy_with_pfams(pfam_ids: Dict[str, FeatureLocation]) -> DummyRecord:
pfam_domains = []
for pfam_id, pfam_location in pfam_ids.items():
domain_id = '%s.%d.%d' % (pfam_id, pfam_location.start, pfam_location.end)
pfam_domain = DummyPFAMDomain(location=pfam_location, protein_start=0, protein_end=5,
identifier=pfam_id, domain_id=domain_id)
pfam_domains.append(pfam_domain)
return DummyRecord(features=pfam_domains)
示例27
def test_get_gos(self):
pfams = {'PF00015': FeatureLocation(0, 3), 'PF00351.42': FeatureLocation(6, 12)}
fake_record = set_dummy_with_pfams(pfams)
gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
for all_ontologies in gos_for_fake_pfam.values():
for ontologies in all_ontologies:
go_ids = [str(go_entry) for go_entry in ontologies.go_entries]
for go_id in go_ids:
assert go_id in self.known_connections[ontologies.pfam]
示例28
def test_results(self):
pfams = {'PF00015': FeatureLocation(0, 3)}
fake_record = set_dummy_with_pfams(pfams)
gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
assert gos_for_fake_pfam == fake_results.pfam_domains_with_gos
assert fake_record.id == fake_results.record_id
for pfam, all_ontologies in fake_results.pfam_domains_with_gos.items():
for ontologies in all_ontologies:
assert ontologies.pfam == pfam.identifier
示例29
def test_from_json(self):
fake_pfam_location = FeatureLocation(0, 12)
pfams = {'PF00015': fake_pfam_location, 'PF00351': fake_pfam_location, 'PF05147': fake_pfam_location}
fake_record = set_dummy_with_pfams(pfams)
gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
result_json = fake_results.to_json()
results_from_json = pfam2go.Pfam2GoResults.from_json(result_json, fake_record)
assert 'PF05147' not in result_json["pfams"]
for pfam in results_from_json.pfam_domains_with_gos:
assert pfam.identifier in result_json["pfams"]
from_json_to_json = results_from_json.to_json()
assert result_json == from_json_to_json
assert from_json_to_json["schema_version"] == 1
示例30
def test_result_vec_to_feature(self):
"Test thiopeptides.result_vec_to_features()"
loc = FeatureLocation(0, 66, strand=1)
orig_feature = DummyCDS(0, 66, locus_tag='FAKE0001')
vec = Thiopeptide(23, 42, 51)
seq = 'SCTSSCTSS'
vec.thio_type = 'Type III'
vec.core = seq
vec.leader = "HEADHEADHEAD"
orig_feature.translation = seq + vec.leader
motif = result_vec_to_feature(orig_feature, vec)
leader, core = motif.to_biopython()
assert loc.start == leader.location.start
assert loc.start + (12 * 3) == leader.location.end
assert loc.strand == leader.location.strand
assert motif.type == 'CDS_motif'
assert motif.peptide_class == "thiopeptide"
assert motif.peptide_subclass == "Type III"
assert orig_feature.locus_tag == motif.locus_tag
assert motif.detailed_information.rodeo_score == 51
assert motif.score == 42
self.assertAlmostEqual(motif.molecular_weight, 861.9, places=1)
assert motif.leader == "HEADHEADHEAD"
assert leader.location.end == core.location.start
assert loc.end == core.location.end
assert loc.strand == core.location.strand
self.assertAlmostEqual(motif.monoisotopic_mass, 861.3, places=1)
assert len(motif.alternative_weights) == 7
for calc, expect in zip(motif.alternative_weights, [879.9, 897.9, 916.0,
934.0, 952.0, 970.0,
988.0]):
self.assertAlmostEqual(calc, expect, places=1)
assert not motif.detailed_information.amidation
assert not motif.detailed_information.macrocycle
assert not motif.tail
assert motif.detailed_information.core_features == "Central ring: pyridine trisubstituted"
assert motif.core == "SCTSSCTSS"