Python源码示例:Bio.SeqFeature.SeqFeature()
示例1
def add_point_feature(self, resnum, feat_type=None, feat_id=None, qualifiers=None):
"""Add a feature to the features list describing a single residue.
Args:
resnum (int): Protein sequence residue number
feat_type (str, optional): Optional description of the feature type (ie. 'catalytic residue')
feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')
"""
if self.feature_file:
raise ValueError('Feature file associated with sequence, please remove file association to append '
'additional features.')
if not feat_type:
feat_type = 'Manually added protein sequence single residue feature'
newfeat = SeqFeature(location=FeatureLocation(ExactPosition(resnum-1), ExactPosition(resnum)),
type=feat_type,
id=feat_id,
qualifiers=qualifiers)
self.features.append(newfeat)
示例2
def add_region_feature(self, start_resnum, end_resnum, feat_type=None, feat_id=None, qualifiers=None):
"""Add a feature to the features list describing a region of the protein sequence.
Args:
start_resnum (int): Start residue number of the protein sequence feature
end_resnum (int): End residue number of the protein sequence feature
feat_type (str, optional): Optional description of the feature type (ie. 'binding domain')
feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')
"""
if self.feature_file:
raise ValueError('Feature file associated with sequence, please remove file association to append '
'additional features.')
if not feat_type:
feat_type = 'Manually added protein sequence region feature'
newfeat = SeqFeature(location=FeatureLocation(start_resnum-1, end_resnum),
type=feat_type,
id=feat_id,
qualifiers=qualifiers)
self.features.append(newfeat)
示例3
def get_residue_annotations(self, start_resnum, end_resnum=None):
"""Retrieve letter annotations for a residue or a range of residues
Args:
start_resnum (int): Residue number
end_resnum (int): Optional residue number, specify if a range is desired
Returns:
dict: Letter annotations for this residue or residues
"""
if not end_resnum:
end_resnum = start_resnum
# Create a new SeqFeature
f = SeqFeature(FeatureLocation(start_resnum - 1, end_resnum))
# Get sequence properties
return f.extract(self).letter_annotations
示例4
def test_translate_feature(self):
'''
Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids
'''
# Seq -> Amino https://en.wikipedia.org/wiki/DNA_codon_table
seq1 = Seq("TTTCTTATGGTCGTA")
seq2 = Seq("TCTTCAACTGCTACA")
seq3 = Seq("CATAATGAATATAAT")
aln = {'seq1': seq1,
'seq2': seq2,
'seq3': seq3}
feature = SeqFeature(FeatureLocation(0, 15), type="domain")
# expected results
expected_translations = {'seq1': 'FLMVV',
'seq2': 'SSTAT',
'seq3': 'HNEYN'}
assert translate.translate_feature(aln, feature) == expected_translations
# TODO: test_vcf_feature, assign_aa_vcf, assign_aa_fasta
# Unclear how to emulate inputs (TreeTime dict, tree)
示例5
def create_faux_record_from_proteins(proteins, id):
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
record = SeqRecord(seq=Seq(''), id=id)
start = 0
end = 0
max_protein_id_len = 45
for protein in proteins:
nucl_length = len(protein.seq) * 3
end += nucl_length
feature = SeqFeature(
location=FeatureLocation(start, end, strand=1),
type="CDS",
qualifiers={
'protein_id': [protein.id[:max_protein_id_len]],
'translation': [str(protein.seq)]
}
)
start += nucl_length
record.features.append(feature)
return record
示例6
def fetch_source_feature(self, gb_record):
source_feature = None
has_source = False
for i in gb_record.features:
if i.type == "source":
source_feature = i
has_source = True
break
if not has_source:
##加一个source feature
my_start_pos = SeqFeature.ExactPosition(0)
my_end_pos = SeqFeature.ExactPosition(len(gb_record.seq))
my_feature_location = FeatureLocation(my_start_pos, my_end_pos)
my_feature_type = "source"
source_feature = SeqFeature.SeqFeature(my_feature_location, type=my_feature_type)
gb_record.features.insert(0, source_feature)
return source_feature
示例7
def drawFig(self):
gdd = GenomeDiagram.Diagram('linear figure')
gdt_features = gdd.new_track(1, greytrack=False, scale=0, height=0.4)
gds_features = gdt_features.new_set()
for name, start, stop in self.list_name_start_stop:
if "COX" in name.upper():
color = "#81CEEA"
elif "NAD" in name.upper():
color = "#F9C997"
elif "ATP" in name.upper():
color = "#E97E8D"
elif ("CYTB" in name.upper()) or ("COB" in name.upper()):
color = "#E2E796"
elif "RRN" in name.upper():
color = "#94F2DB"
# strand = -1 if name in ["nad1", "cytb", "nad4", "nad4L", "rrnL"] else 1
feature = SeqFeature(FeatureLocation(int(start), int(stop)), strand=1)
gds_features.add_feature(feature, name=name, label=True,
label_size=self.dict_args["label_size"], label_angle=self.dict_args["Label_angle"],
color=self.dict_args["Label_color"], label_position=self.dict_args["Label_position"],
sigil="BIGARROW", arrowshaft_height=0.5,
arrowhead_length=0.5)
gdd.draw(format='linear', pagesize=(self.dict_args["fig_width"] * cm, self.dict_args["fig_height"] * cm), fragments=1,
start=0, end=int(stop))
gdd.write(self.dict_args["exportPath"] + os.sep + "linear.pdf", "pdf")
示例8
def store_promoters(promoters: Iterable[Promoter], record: Record) -> None:
"""Store information about promoter sequences to a SeqRecord"""
for promoter in promoters:
# remember to account for 0-indexed start location
new_feature = SeqFeature(FeatureLocation(max(0, promoter.start - 1), promoter.end),
type="promoter")
new_feature.qualifiers = {
"locus_tag": promoter.get_gene_names(), # already a list with one or two elements
"seq": [str(promoter.seq)],
}
if isinstance(promoter, CombinedPromoter):
new_feature.qualifiers["note"] = ["bidirectional promoter"]
secmet_version = Feature.from_biopython(new_feature)
secmet_version.created_by_antismash = True
record.add_feature(secmet_version)
示例9
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
# grab mandatory qualifiers and create the class
tool = leftovers.pop("aSTool")[0]
protein_location = generate_protein_location_from_qualifiers(leftovers, record)
# locus tag is special, antismash versions <= 5.0 didn't require it, but > 5.0 do
locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0]
feature = cls(bio_feature.location, tool, protein_location, locus_tag)
# grab optional qualifiers
feature.domain_subtype = leftovers.pop("domain_subtype", [""])[0] or None
feature.specificity = leftovers.pop("specificity", [])
# grab parent optional qualifiers
super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
return feature
示例10
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Optional[Dict] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
tool = leftovers.pop("aStool")[0]
probability = None
if "probability" in leftovers:
probability = float(leftovers.pop("probability")[0])
label = leftovers.pop("label", [""])[0]
if not label:
label = leftovers.pop("anchor", [""])[0] # backwards compatibility
if not feature:
feature = cls(bio_feature.location, tool, probability, label)
# remove the subregion_number, as it's not relevant
leftovers.pop("subregion_number", "")
# grab parent optional qualifiers
super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
return feature
示例11
def to_biopython(self, qualifiers: Dict[str, List[str]] = None) -> SeqFeature:
mine = OrderedDict() # type: Dict[str, List[str]]
# mandatory
mine["translation"] = [self.translation]
# optional
for attr in ["gene", "transl_table", "locus_tag",
"protein_id", "product"]:
val = getattr(self, attr)
if val:
mine[attr] = [str(val)]
if self._gene_functions:
mine["gene_functions"] = list(map(str, self._gene_functions))
mine["gene_kind"] = [str(self.gene_function)]
if self.sec_met:
mine["sec_met_domain"] = list(map(str, self.sec_met))
if self.nrps_pks:
mine["NRPS_PKS"] = list(map(str, self.nrps_pks))
# respect qualifiers given to us
if qualifiers:
mine.update(qualifiers)
return super().to_biopython(mine)
示例12
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
candidate_numbers = [int(num) for num in leftovers.pop("candidate_cluster_numbers", [])]
subregion_numbers = [int(num) for num in leftovers.pop("subregion_numbers", [])]
if not record:
raise ValueError("record instance required for regenerating Region from biopython")
all_candidates = record.get_candidate_clusters()
all_subs = record.get_subregions()
if candidate_numbers and max(candidate_numbers) > len(all_candidates):
raise ValueError("record does not contain all expected candidate clusters")
if subregion_numbers and max(subregion_numbers) > len(all_subs):
raise ValueError("record does not contain all expected subregions")
candidates = [all_candidates[num - 1] for num in candidate_numbers]
subs = [all_subs[num - 1] for num in subregion_numbers]
return cls(candidates, subs)
示例13
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
if not feature:
raise ValueError("Domain shouldn't be instantiated directly")
else:
assert isinstance(feature, Domain), type(feature)
# clean up qualifiers that must have been used already
leftovers.pop("protein_start", None)
leftovers.pop("protein_end", None)
# grab optional qualifiers
feature.domain = leftovers.pop("aSDomain", [""])[0] or None
for asf_label in leftovers.pop("ASF", []):
feature.asf.add(asf_label)
# grab parent optional qualifiers
updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
assert updated is feature
assert isinstance(updated, Domain)
return updated
示例14
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Optional[Dict] = None, record: Any = None) -> T:
""" Does not return a proper CandidateCluster instance as extra information
is required from the record in order to properly rebuild it
"""
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
if not record:
raise ValueError("record instance required for regenerating CandidateCluster from biopython")
all_protoclusters = record.get_protoclusters()
protocluster_numbers = [int(num) for num in leftovers.pop("protoclusters")]
if max(protocluster_numbers) > len(all_protoclusters):
raise ValueError("record does not contain all expected protoclusters")
kind = CandidateClusterKind.from_string(leftovers.pop("kind")[0])
smiles = leftovers.pop("SMILES", [None])[0]
polymer = leftovers.pop("polymer", [None])[0]
children = [all_protoclusters[num - 1] for num in protocluster_numbers]
return cls(kind, children, smiles, polymer)
示例15
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Optional[Dict[str, List[str]]] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
if not feature:
tool = leftovers.pop("aSTool", [""])[0]
if not tool:
return cast(T, ExternalCDSMotif.from_biopython(bio_feature, None, leftovers, record))
protein_location = generate_protein_location_from_qualifiers(leftovers, record)
locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0]
feature = cls(bio_feature.location, locus_tag, protein_location, tool=tool)
updated = super().from_biopython(bio_feature, feature, leftovers, record=record)
assert updated is feature
assert isinstance(updated, CDSMotif)
return updated
示例16
def test_non_antismash_motif_from_raw(self):
original = SeqFeature(FeatureLocation(7, 10))
original.qualifiers["stuff"] = ["thing"]
motif = CDSMotif.from_biopython(original)
assert isinstance(motif, ExternalCDSMotif)
assert motif.tool == "external"
assert not motif.created_by_antismash
assert motif.domain_id is None
# add a domain_id so a Record can use the motif
motif.domain_id = "testname"
new = motif.to_biopython()[0]
# generated domain_id should not be kept for non-antismash features
assert "domain_id" not in new.qualifiers
assert new.qualifiers == original.qualifiers
示例17
def test_simple_feature(self):
location = FeatureLocation(1, 6, strand=1)
f_type = "test type"
qualifiers = {"a": ["1", "2"], "b": ["3", "4"]}
f_id = "dummy id"
# skipping biopython deprecated members: ref, ref_db, strand, location_operator
feature = SeqFeature(location=location, type=f_type,
qualifiers=qualifiers, id=f_id)
print(str(feature))
json = serialiser.feature_to_json(feature)
print(json) # for debugging failures
new_feature = serialiser.feature_from_json(json)
print(str(new_feature))
assert new_feature.qualifiers == feature.qualifiers
assert new_feature.id == feature.id
assert new_feature.type == feature.type
assert str(new_feature.location) == str(new_feature.location)
示例18
def test_write_fasta_file(self, seqprop_with_i, tmpdir, test_files_outputs, seq_record_example):
"""Test that everything behaves properly when writing the SeqProp to a FASTA file"""
# Add dummy annotations to the SeqProp - check to see if they stay in the SeqProp even after Seq is written
seqprop_with_i.letter_annotations.update({'test_la_key': 'X' * len(seqprop_with_i.seq)})
seqprop_with_i.features.append(SeqFeature(FeatureLocation(1, 3)))
# Write the Seq to a FASTA file
outpath = tmpdir.join('test_seqprop_with_i_write_fasta_file.fasta').strpath
seqprop_with_i.write_fasta_file(outfile=outpath, force_rerun=True)
# Test that the file was written
assert op.exists(outpath)
assert op.getsize(outpath) > 0
# Test that file paths are correct
assert seqprop_with_i.sequence_path == outpath
assert seqprop_with_i.sequence_file == 'test_seqprop_with_i_write_fasta_file.fasta'
assert seqprop_with_i.sequence_dir == tmpdir
# Once a file is written, the annotations should not be lost, even though the sequence now
# loads from the written file as a Seq
assert seqprop_with_i.description == seq_record_example.description
assert seqprop_with_i.annotations == seq_record_example.annotations
assert seqprop_with_i.letter_annotations == {'test_la_key': 'X' * len(seq_record_example.seq)}
assert len(seqprop_with_i.features) == 1
# Test that sequence cannot be changed
with pytest.raises(ValueError):
seqprop_with_i.seq = 'THISWILLNOTBETHESEQ'
assert seqprop_with_i.seq == seq_record_example.seq
示例19
def write_genbank(self):
"""
Write prophages and their potential attachment sites in updated input GenBank file.
:param self: the data object
:return: None
"""
log_and_message("Writing GenBank output file", c="GREEN", stderr=True, quiet=self.quiet)
prophage_feature_type = 'misc_feature' # / prophage_region
outfile = os.path.join(self.output_dir, self.file_prefix + os.path.basename(self.infile))
for i in self.pp:
self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
location=FeatureLocation(self.pp[i]['start'], self.pp[i]['stop']),
type=prophage_feature_type,
strand=1,
qualifiers=OrderedDict(
{'note': f'prophage region pp{i} identified with PhiSpy v{version.__version__}'}
)))
if 'atts' in self.pp[i]:
self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
location=FeatureLocation(int(self.pp[i]['att'][0]), int(self.pp[i]['att'][1])) +
FeatureLocation(int(self.pp[i]['att'][2]), int(self.pp[i]['att'][3])),
type='repeat_region',
strand=1,
qualifiers=OrderedDict({'note': f'prophage region pp{i} potential attachment sites'})))
# are we writing a gzip file
if is_gzip_file(self.infile):
handle = gzip.open(outfile, 'wt')
else:
handle = open(outfile, 'w')
SeqIO.write(self.record, handle, 'genbank')
示例20
def processed_record(detector_name='deepbgc', detector_label='deepbgc', score_threshold=0.5):
comment_key = util.format_detector_meta_key(detector_label)
record = SeqRecord(Seq('ACTGCTCGACTGATT', alphabet=generic_dna))
record.annotations['structured_comment'] = collections.OrderedDict()
record.annotations['structured_comment'][comment_key] = collections.OrderedDict(
name=detector_name,
label=detector_label,
score_threshold=score_threshold
)
# Add protein features
record.features.append(SeqFeature(FeatureLocation(0, 2), type='CDS', qualifiers={'locus_tag': ['A']}))
record.features.append(SeqFeature(FeatureLocation(2, 5), type='CDS', qualifiers={'locus_tag': ['B']}))
record.features.append(SeqFeature(FeatureLocation(5, 8), type='CDS', qualifiers={'locus_tag': ['C']}))
# Add pfam features
score_column = util.format_bgc_score_column(detector_name)
qualifiers = {score_column: [0.4], 'db_xref': ['PF00001'], 'locus_tag': ['A'], 'database': [PFAM_DB_VERSION]}
record.features.append(SeqFeature(FeatureLocation(0, 2), type=util.PFAM_FEATURE, qualifiers=qualifiers))
qualifiers = {score_column: [0.7], 'db_xref': ['PF00002'], 'locus_tag': ['B'], 'database': [PFAM_DB_VERSION]}
record.features.append(SeqFeature(FeatureLocation(2, 5), type=util.PFAM_FEATURE, qualifiers=qualifiers))
qualifiers = {score_column: [0.6], 'db_xref': ['PF00003'], 'locus_tag': ['C'], 'database': [PFAM_DB_VERSION]}
record.features.append(SeqFeature(FeatureLocation(5, 8), type=util.PFAM_FEATURE, qualifiers=qualifiers))
# Add BGC features
qualifiers = { score_column: ['0.6'], 'detector': [detector_name], 'detector_label': [detector_label]}
record.features.append(SeqFeature(FeatureLocation(0, 5), type='cluster', qualifiers=qualifiers))
qualifiers = { 'detector': ['annotated'], 'detector_label': ['annotated']}
record.features.append(SeqFeature(FeatureLocation(2, 8), type='cluster', qualifiers=qualifiers))
return record
示例21
def test_feature_creation(self):
fw_loc = FeatureLocation(210, 300, strand=1)
fw_feature = SeqFeature(fw_loc, type='CDS')
results = tta.tta.TTAResults('dummy', gc_content=1, threshold=0.65)
ret = results.new_feature_from_other(fw_feature, 12)
self.assertEqual(ret.strand, 1)
self.assertEqual(ret.location.start, 222)
self.assertEqual(ret.location.end, 225)
rv_loc = FeatureLocation(210, 300, strand=-1)
rv_feature = SeqFeature(rv_loc, type='CDS')
ret = results.new_feature_from_other(rv_feature, 12)
self.assertEqual(ret.strand, -1)
self.assertEqual(ret.location.start, 285)
self.assertEqual(ret.location.end, 288)
示例22
def to_biopython(self, qualifiers: Dict[str, List[str]] = None) -> List[SeqFeature]:
mine = OrderedDict() # type: Dict[str, List[str]]
if self.domain_subtype:
mine["domain_subtype"] = [self.domain_subtype]
if self.specificity:
mine["specificity"] = self.specificity
if qualifiers:
mine.update(qualifiers)
return super().to_biopython(mine)
示例23
def to_biopython(self, qualifiers: Dict[str, Any] = None) -> List[SeqFeature]:
""" Converts this feature into one or more SeqFeature instances.
Subclasses must manage their own attributes and potential extra
features.
"""
feature = SeqFeature(self.location, type=self.type)
quals = self._qualifiers.copy()
notes = self._qualifiers.get("note", [])
assert notes is not None
notes.extend(self.notes)
if qualifiers:
notes += qualifiers.pop("note", [])
quals.update(qualifiers)
if notes:
# sorting helps with consistency and comparison testing
quals["note"] = sorted(notes)
if self.created_by_antismash:
quals["tool"] = ["antismash"]
if self._original_codon_start is not None:
start = int(self._original_codon_start)
quals["codon_start"] = [str(start + 1)]
# adjust location back if neccessary
if self.location.strand == -1:
start *= -1
if self._original_codon_start != 0:
feature.location = _adjust_location_by_offset(feature.location, -start)
# sorted here to match the behaviour of biopython
for key, val in sorted(quals.items()):
feature.qualifiers[key] = val
assert isinstance(feature.qualifiers, dict)
return [feature]
示例24
def to_biopython(self, qualifiers: Optional[Dict[str, List[str]]] = None) -> List[SeqFeature]:
if not qualifiers:
qualifiers = {}
if self._parent_record:
qualifiers["region_number"] = [str(self.get_region_number())]
qualifiers["product"] = self.products
qualifiers["rules"] = self.detection_rules
qualifiers["probabilities"] = ["%.4f" % prob for prob in self.probabilities]
qualifiers["subregion_numbers"] = [str(sub.get_subregion_number()) for sub in self._subregions]
candidates = [str(cand.get_candidate_cluster_number()) for cand in self._candidate_clusters]
qualifiers["candidate_cluster_numbers"] = candidates
return super().to_biopython(qualifiers)
示例25
def to_biopython(self, qualifiers: Dict[str, Any] = None) -> SeqFeature:
""" Construct a matching SeqFeature for this Gene """
if not qualifiers:
qualifiers = {}
if self.locus_tag:
qualifiers["locus_tag"] = [self.locus_tag]
if self.gene_name:
qualifiers["gene"] = [self.gene_name]
return super().to_biopython(qualifiers)
示例26
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
# grab mandatory qualifiers and create the class
locus = leftovers.pop("locus_tag", [""])[0] or None
name = leftovers.pop("gene", [""])[0] or None
if not (locus or name):
name = "gene%s_%s" % (bio_feature.location.start, bio_feature.location.end)
feature = cls(bio_feature.location, locus_tag=locus, gene_name=name)
super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
return feature
示例27
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, Any] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
section = leftovers.pop("prepeptide", [""])[0]
if not section:
raise SecmetInvalidInputError("cannot reconstruct Prepeptide from biopython feature %s" % bio_feature)
elif section != "core":
raise SecmetInvalidInputError("Prepeptide can only be reconstructed from core feature")
alt_weights = [float(weight) for weight in leftovers.pop("alternative_weights", [])]
leader = leftovers.pop("leader_sequence", [""])[0]
locations = [bio_feature.location]
if leader:
leader_location = location_from_string(leftovers.pop("leader_location")[0])
locations.insert(0, leader_location)
tail = leftovers.pop("tail_sequence", [""])[0]
if tail:
tail_location = location_from_string(leftovers.pop("tail_location")[0])
locations.append(tail_location)
location = build_location_from_others(locations)
return cls(
location,
leftovers.pop("peptide")[0],
leftovers.pop("core_sequence")[0],
leftovers.pop("locus_tag")[0],
leftovers.pop("aSTool")[0],
leftovers.pop("predicted_class")[0],
float(leftovers.pop("score")[0]),
float(leftovers.pop("monoisotopic_mass")[0]),
float(leftovers.pop("molecular_weight")[0]),
alt_weights, leader, tail
)
示例28
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
if not feature:
raise SecmetInvalidInputError("AntismashFeature shouldn't be instantiated directly")
else:
assert isinstance(feature, AntismashFeature)
# semi-optional qualifiers
if leftovers.get("tool") == ["antismash"] and not feature.tool:
raise SecmetInvalidInputError("an AntismashFeature created by antiSMASH must have a tool supplied")
# grab optional qualifiers
feature.domain_id = leftovers.pop("domain_id", [""])[0] or None
if feature.domain_id:
# long ids causing linebreaks in genbanks can have spaces inserted
# strip them out so id-based lookups can function again
feature.domain_id = feature.domain_id.replace(" ", "")
feature.database = leftovers.pop("database", [""])[0] or None
feature.detection = leftovers.pop("detection", [""])[0] or None
feature.label = leftovers.pop("label", [""])[0] or None
if feature.label:
# again, long ids causing linebreaks in genbanks can have spaces inserted
feature.label = feature.label.replace(" ", "")
if not feature.locus_tag: # may already be populated
feature.locus_tag = leftovers.pop("locus_tag", [""])[0] or None
translation = leftovers.pop("translation", [""])[0] or None
if translation is not None:
feature.translation = translation
if "evalue" in leftovers:
feature.evalue = float(leftovers.pop("evalue")[0])
if "score" in leftovers:
feature.score = float(leftovers.pop("score")[0])
# grab parent optional qualifiers
updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
assert isinstance(updated, AntismashFeature)
return updated
示例29
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
assert issubclass(cls, CDSCollection)
if leftovers is None:
leftovers = Feature.make_qualifiers_copy(bio_feature)
contig_edge = leftovers.pop("contig_edge", [""])[0] == "True"
if not feature:
feature = cls(bio_feature.location, bio_feature.type)
feature._contig_edge = contig_edge # pylint: disable=protected-access
# grab parent optional qualifiers
super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
return feature
示例30
def to_biopython(self, qualifiers: Optional[Dict[str, List[str]]] = None) -> List[SeqFeature]:
if not qualifiers:
qualifiers = {}
if self.parent_record:
qualifiers["contig_edge"] = [str(self.contig_edge)]
return super().to_biopython(qualifiers)