Python源码示例:Bio.SeqFeature.SeqFeature()

示例1
def add_point_feature(self, resnum, feat_type=None, feat_id=None, qualifiers=None):
        """Add a feature to the features list describing a single residue.

        Args:
            resnum (int): Protein sequence residue number
            feat_type (str, optional): Optional description of the feature type (ie. 'catalytic residue')
            feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')

        """
        if self.feature_file:
            raise ValueError('Feature file associated with sequence, please remove file association to append '
                             'additional features.')

        if not feat_type:
            feat_type = 'Manually added protein sequence single residue feature'
        newfeat = SeqFeature(location=FeatureLocation(ExactPosition(resnum-1), ExactPosition(resnum)),
                             type=feat_type,
                             id=feat_id,
                             qualifiers=qualifiers)

        self.features.append(newfeat) 
示例2
def add_region_feature(self, start_resnum, end_resnum, feat_type=None, feat_id=None, qualifiers=None):
        """Add a feature to the features list describing a region of the protein sequence.

        Args:
            start_resnum (int): Start residue number of the protein sequence feature
            end_resnum (int): End residue number of the protein sequence feature
            feat_type (str, optional): Optional description of the feature type (ie. 'binding domain')
            feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')

        """
        if self.feature_file:
            raise ValueError('Feature file associated with sequence, please remove file association to append '
                             'additional features.')

        if not feat_type:
            feat_type = 'Manually added protein sequence region feature'
        newfeat = SeqFeature(location=FeatureLocation(start_resnum-1, end_resnum),
                             type=feat_type,
                             id=feat_id,
                             qualifiers=qualifiers)

        self.features.append(newfeat) 
示例3
def get_residue_annotations(self, start_resnum, end_resnum=None):
        """Retrieve letter annotations for a residue or a range of residues

        Args:
            start_resnum (int): Residue number
            end_resnum (int): Optional residue number, specify if a range is desired

        Returns:
            dict: Letter annotations for this residue or residues

        """
        if not end_resnum:
            end_resnum = start_resnum

        # Create a new SeqFeature
        f = SeqFeature(FeatureLocation(start_resnum - 1, end_resnum))

        # Get sequence properties
        return f.extract(self).letter_annotations 
示例4
def test_translate_feature(self):
        '''
        Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids
        '''
        # Seq -> Amino https://en.wikipedia.org/wiki/DNA_codon_table
        seq1 = Seq("TTTCTTATGGTCGTA") 
        seq2 = Seq("TCTTCAACTGCTACA")
        seq3 = Seq("CATAATGAATATAAT")
        aln = {'seq1': seq1,
               'seq2': seq2,
               'seq3': seq3}
        feature = SeqFeature(FeatureLocation(0, 15), type="domain")

        # expected results
        expected_translations = {'seq1': 'FLMVV',
                                 'seq2': 'SSTAT',
                                 'seq3': 'HNEYN'}

        assert translate.translate_feature(aln, feature) == expected_translations

    # TODO: test_vcf_feature, assign_aa_vcf, assign_aa_fasta
    # Unclear how to emulate inputs (TreeTime dict, tree) 
示例5
def create_faux_record_from_proteins(proteins, id):
    from Bio.SeqRecord import SeqRecord
    from Bio.Seq import Seq
    from Bio.SeqFeature import SeqFeature, FeatureLocation
    record = SeqRecord(seq=Seq(''), id=id)
    start = 0
    end = 0
    max_protein_id_len = 45
    for protein in proteins:
        nucl_length = len(protein.seq) * 3
        end += nucl_length
        feature = SeqFeature(
            location=FeatureLocation(start, end, strand=1),
            type="CDS",
            qualifiers={
                'protein_id': [protein.id[:max_protein_id_len]],
                'translation': [str(protein.seq)]
            }
        )
        start += nucl_length
        record.features.append(feature)
    return record 
示例6
def fetch_source_feature(self, gb_record):
        source_feature = None
        has_source = False
        for i in gb_record.features:
            if i.type == "source":
                source_feature = i
                has_source = True
                break
        if not has_source:
            ##加一个source feature
            my_start_pos = SeqFeature.ExactPosition(0)
            my_end_pos = SeqFeature.ExactPosition(len(gb_record.seq))
            my_feature_location = FeatureLocation(my_start_pos, my_end_pos)
            my_feature_type = "source"
            source_feature = SeqFeature.SeqFeature(my_feature_location, type=my_feature_type)
            gb_record.features.insert(0, source_feature)
        return source_feature 
示例7
def drawFig(self):
        gdd = GenomeDiagram.Diagram('linear figure')
        gdt_features = gdd.new_track(1, greytrack=False, scale=0, height=0.4)
        gds_features = gdt_features.new_set()
        for name, start, stop in self.list_name_start_stop:
            if "COX" in name.upper():
                color = "#81CEEA"
            elif "NAD" in name.upper():
                color = "#F9C997"
            elif "ATP" in name.upper():
                color = "#E97E8D"
            elif ("CYTB" in name.upper()) or ("COB" in name.upper()):
                color = "#E2E796"
            elif "RRN" in name.upper():
                color = "#94F2DB"
            # strand = -1 if name in ["nad1", "cytb", "nad4", "nad4L", "rrnL"] else 1
            feature = SeqFeature(FeatureLocation(int(start), int(stop)), strand=1)
            gds_features.add_feature(feature, name=name, label=True,
                                     label_size=self.dict_args["label_size"], label_angle=self.dict_args["Label_angle"],
                                     color=self.dict_args["Label_color"], label_position=self.dict_args["Label_position"],
                                     sigil="BIGARROW", arrowshaft_height=0.5,
                                     arrowhead_length=0.5)
        gdd.draw(format='linear', pagesize=(self.dict_args["fig_width"] * cm, self.dict_args["fig_height"] * cm), fragments=1,
                 start=0, end=int(stop))
        gdd.write(self.dict_args["exportPath"] + os.sep + "linear.pdf", "pdf") 
示例8
def store_promoters(promoters: Iterable[Promoter], record: Record) -> None:
    """Store information about promoter sequences to a SeqRecord"""
    for promoter in promoters:
        # remember to account for 0-indexed start location
        new_feature = SeqFeature(FeatureLocation(max(0, promoter.start - 1), promoter.end),
                                 type="promoter")
        new_feature.qualifiers = {
            "locus_tag": promoter.get_gene_names(),  # already a list with one or two elements
            "seq": [str(promoter.seq)],
        }

        if isinstance(promoter, CombinedPromoter):
            new_feature.qualifiers["note"] = ["bidirectional promoter"]

        secmet_version = Feature.from_biopython(new_feature)
        secmet_version.created_by_antismash = True

        record.add_feature(secmet_version) 
示例9
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers and create the class
        tool = leftovers.pop("aSTool")[0]
        protein_location = generate_protein_location_from_qualifiers(leftovers, record)
        # locus tag is special, antismash versions <= 5.0 didn't require it, but > 5.0 do
        locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0]
        feature = cls(bio_feature.location, tool, protein_location, locus_tag)

        # grab optional qualifiers
        feature.domain_subtype = leftovers.pop("domain_subtype", [""])[0] or None
        feature.specificity = leftovers.pop("specificity", [])

        # grab parent optional qualifiers
        super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)

        return feature 
示例10
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Optional[Dict] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)

        tool = leftovers.pop("aStool")[0]
        probability = None
        if "probability" in leftovers:
            probability = float(leftovers.pop("probability")[0])
        label = leftovers.pop("label", [""])[0]
        if not label:
            label = leftovers.pop("anchor", [""])[0]  # backwards compatibility
        if not feature:
            feature = cls(bio_feature.location, tool, probability, label)

        # remove the subregion_number, as it's not relevant
        leftovers.pop("subregion_number", "")

        # grab parent optional qualifiers
        super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
        return feature 
示例11
def to_biopython(self, qualifiers: Dict[str, List[str]] = None) -> SeqFeature:
        mine = OrderedDict()  # type: Dict[str, List[str]]
        # mandatory
        mine["translation"] = [self.translation]
        # optional
        for attr in ["gene", "transl_table", "locus_tag",
                     "protein_id", "product"]:
            val = getattr(self, attr)
            if val:
                mine[attr] = [str(val)]
        if self._gene_functions:
            mine["gene_functions"] = list(map(str, self._gene_functions))
            mine["gene_kind"] = [str(self.gene_function)]
        if self.sec_met:
            mine["sec_met_domain"] = list(map(str, self.sec_met))
        if self.nrps_pks:
            mine["NRPS_PKS"] = list(map(str, self.nrps_pks))
        # respect qualifiers given to us
        if qualifiers:
            mine.update(qualifiers)
        return super().to_biopython(mine) 
示例12
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)

        candidate_numbers = [int(num) for num in leftovers.pop("candidate_cluster_numbers", [])]
        subregion_numbers = [int(num) for num in leftovers.pop("subregion_numbers", [])]

        if not record:
            raise ValueError("record instance required for regenerating Region from biopython")

        all_candidates = record.get_candidate_clusters()
        all_subs = record.get_subregions()

        if candidate_numbers and max(candidate_numbers) > len(all_candidates):
            raise ValueError("record does not contain all expected candidate clusters")
        if subregion_numbers and max(subregion_numbers) > len(all_subs):
            raise ValueError("record does not contain all expected subregions")

        candidates = [all_candidates[num - 1] for num in candidate_numbers]
        subs = [all_subs[num - 1] for num in subregion_numbers]

        return cls(candidates, subs) 
示例13
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        if not feature:
            raise ValueError("Domain shouldn't be instantiated directly")
        else:
            assert isinstance(feature, Domain), type(feature)

        # clean up qualifiers that must have been used already
        leftovers.pop("protein_start", None)
        leftovers.pop("protein_end", None)

        # grab optional qualifiers
        feature.domain = leftovers.pop("aSDomain", [""])[0] or None
        for asf_label in leftovers.pop("ASF", []):
            feature.asf.add(asf_label)

        # grab parent optional qualifiers
        updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
        assert updated is feature
        assert isinstance(updated, Domain)
        return updated 
示例14
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Optional[Dict] = None, record: Any = None) -> T:
        """ Does not return a proper CandidateCluster instance as extra information
            is required from the record in order to properly rebuild it
        """
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)

        if not record:
            raise ValueError("record instance required for regenerating CandidateCluster from biopython")

        all_protoclusters = record.get_protoclusters()
        protocluster_numbers = [int(num) for num in leftovers.pop("protoclusters")]

        if max(protocluster_numbers) > len(all_protoclusters):
            raise ValueError("record does not contain all expected protoclusters")

        kind = CandidateClusterKind.from_string(leftovers.pop("kind")[0])
        smiles = leftovers.pop("SMILES", [None])[0]
        polymer = leftovers.pop("polymer", [None])[0]
        children = [all_protoclusters[num - 1] for num in protocluster_numbers]
        return cls(kind, children, smiles, polymer) 
示例15
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Optional[Dict[str, List[str]]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        if not feature:
            tool = leftovers.pop("aSTool", [""])[0]
            if not tool:
                return cast(T, ExternalCDSMotif.from_biopython(bio_feature, None, leftovers, record))
            protein_location = generate_protein_location_from_qualifiers(leftovers, record)
            locus_tag = leftovers.pop("locus_tag", ["(unknown)"])[0]
            feature = cls(bio_feature.location, locus_tag, protein_location, tool=tool)

        updated = super().from_biopython(bio_feature, feature, leftovers, record=record)
        assert updated is feature
        assert isinstance(updated, CDSMotif)
        return updated 
示例16
def test_non_antismash_motif_from_raw(self):
        original = SeqFeature(FeatureLocation(7, 10))
        original.qualifiers["stuff"] = ["thing"]

        motif = CDSMotif.from_biopython(original)
        assert isinstance(motif, ExternalCDSMotif)
        assert motif.tool == "external"
        assert not motif.created_by_antismash
        assert motif.domain_id is None

        # add a domain_id so a Record can use the motif
        motif.domain_id = "testname"

        new = motif.to_biopython()[0]
        # generated domain_id should not be kept for non-antismash features
        assert "domain_id" not in new.qualifiers
        assert new.qualifiers == original.qualifiers 
示例17
def test_simple_feature(self):
        location = FeatureLocation(1, 6, strand=1)
        f_type = "test type"
        qualifiers = {"a": ["1", "2"], "b": ["3", "4"]}
        f_id = "dummy id"
        # skipping biopython deprecated members: ref, ref_db, strand, location_operator

        feature = SeqFeature(location=location, type=f_type,
                             qualifiers=qualifiers, id=f_id)
        print(str(feature))

        json = serialiser.feature_to_json(feature)
        print(json)  # for debugging failures
        new_feature = serialiser.feature_from_json(json)
        print(str(new_feature))
        assert new_feature.qualifiers == feature.qualifiers
        assert new_feature.id == feature.id
        assert new_feature.type == feature.type
        assert str(new_feature.location) == str(new_feature.location) 
示例18
def test_write_fasta_file(self, seqprop_with_i, tmpdir, test_files_outputs, seq_record_example):
        """Test that everything behaves properly when writing the SeqProp to a FASTA file"""
        # Add dummy annotations to the SeqProp - check to see if they stay in the SeqProp even after Seq is written
        seqprop_with_i.letter_annotations.update({'test_la_key': 'X' * len(seqprop_with_i.seq)})
        seqprop_with_i.features.append(SeqFeature(FeatureLocation(1, 3)))

        # Write the Seq to a FASTA file
        outpath = tmpdir.join('test_seqprop_with_i_write_fasta_file.fasta').strpath
        seqprop_with_i.write_fasta_file(outfile=outpath, force_rerun=True)

        # Test that the file was written
        assert op.exists(outpath)
        assert op.getsize(outpath) > 0

        # Test that file paths are correct
        assert seqprop_with_i.sequence_path == outpath
        assert seqprop_with_i.sequence_file == 'test_seqprop_with_i_write_fasta_file.fasta'
        assert seqprop_with_i.sequence_dir == tmpdir

        # Once a file is written, the annotations should not be lost, even though the sequence now
            # loads from the written file as a Seq
        assert seqprop_with_i.description == seq_record_example.description
        assert seqprop_with_i.annotations == seq_record_example.annotations
        assert seqprop_with_i.letter_annotations == {'test_la_key': 'X' * len(seq_record_example.seq)}
        assert len(seqprop_with_i.features) == 1

        # Test that sequence cannot be changed
        with pytest.raises(ValueError):
            seqprop_with_i.seq = 'THISWILLNOTBETHESEQ'
        assert seqprop_with_i.seq == seq_record_example.seq 
示例19
def write_genbank(self):
    """
    Write prophages and their potential attachment sites in updated input GenBank file.
    :param self: the data object
    :return: None
    """

    log_and_message("Writing GenBank output file", c="GREEN", stderr=True, quiet=self.quiet)
    prophage_feature_type = 'misc_feature'  # / prophage_region
    outfile = os.path.join(self.output_dir, self.file_prefix + os.path.basename(self.infile))
    for i in self.pp:
        self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
                    location=FeatureLocation(self.pp[i]['start'], self.pp[i]['stop']),
                    type=prophage_feature_type,
                    strand=1,
                    qualifiers=OrderedDict(
                        {'note': f'prophage region pp{i} identified with PhiSpy v{version.__version__}'}
                    )))
        if 'atts' in self.pp[i]:
            self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
                        location=FeatureLocation(int(self.pp[i]['att'][0]), int(self.pp[i]['att'][1])) +
                                 FeatureLocation(int(self.pp[i]['att'][2]), int(self.pp[i]['att'][3])),
                        type='repeat_region',
                        strand=1,
                        qualifiers=OrderedDict({'note': f'prophage region pp{i} potential attachment sites'})))

    # are we writing a gzip file
    if is_gzip_file(self.infile):
        handle = gzip.open(outfile, 'wt')
    else:
        handle = open(outfile, 'w')

    SeqIO.write(self.record, handle, 'genbank') 
示例20
def processed_record(detector_name='deepbgc', detector_label='deepbgc', score_threshold=0.5):
    comment_key = util.format_detector_meta_key(detector_label)
    record = SeqRecord(Seq('ACTGCTCGACTGATT', alphabet=generic_dna))
    record.annotations['structured_comment'] = collections.OrderedDict()
    record.annotations['structured_comment'][comment_key] = collections.OrderedDict(
        name=detector_name,
        label=detector_label,
        score_threshold=score_threshold
    )
    # Add protein features
    record.features.append(SeqFeature(FeatureLocation(0, 2), type='CDS', qualifiers={'locus_tag': ['A']}))
    record.features.append(SeqFeature(FeatureLocation(2, 5), type='CDS', qualifiers={'locus_tag': ['B']}))
    record.features.append(SeqFeature(FeatureLocation(5, 8), type='CDS', qualifiers={'locus_tag': ['C']}))
    # Add pfam features
    score_column = util.format_bgc_score_column(detector_name)
    qualifiers = {score_column: [0.4], 'db_xref': ['PF00001'], 'locus_tag': ['A'], 'database': [PFAM_DB_VERSION]}
    record.features.append(SeqFeature(FeatureLocation(0, 2), type=util.PFAM_FEATURE, qualifiers=qualifiers))
    qualifiers = {score_column: [0.7], 'db_xref': ['PF00002'], 'locus_tag': ['B'], 'database': [PFAM_DB_VERSION]}
    record.features.append(SeqFeature(FeatureLocation(2, 5), type=util.PFAM_FEATURE, qualifiers=qualifiers))
    qualifiers = {score_column: [0.6], 'db_xref': ['PF00003'], 'locus_tag': ['C'], 'database': [PFAM_DB_VERSION]}
    record.features.append(SeqFeature(FeatureLocation(5, 8), type=util.PFAM_FEATURE, qualifiers=qualifiers))
    # Add BGC features
    qualifiers = { score_column: ['0.6'], 'detector': [detector_name], 'detector_label': [detector_label]}
    record.features.append(SeqFeature(FeatureLocation(0, 5), type='cluster', qualifiers=qualifiers))
    qualifiers = { 'detector': ['annotated'], 'detector_label': ['annotated']}
    record.features.append(SeqFeature(FeatureLocation(2, 8), type='cluster', qualifiers=qualifiers))
    return record 
示例21
def test_feature_creation(self):
        fw_loc = FeatureLocation(210, 300, strand=1)
        fw_feature = SeqFeature(fw_loc, type='CDS')
        results = tta.tta.TTAResults('dummy', gc_content=1, threshold=0.65)
        ret = results.new_feature_from_other(fw_feature, 12)
        self.assertEqual(ret.strand, 1)
        self.assertEqual(ret.location.start, 222)
        self.assertEqual(ret.location.end, 225)

        rv_loc = FeatureLocation(210, 300, strand=-1)
        rv_feature = SeqFeature(rv_loc, type='CDS')
        ret = results.new_feature_from_other(rv_feature, 12)
        self.assertEqual(ret.strand, -1)
        self.assertEqual(ret.location.start, 285)
        self.assertEqual(ret.location.end, 288) 
示例22
def to_biopython(self, qualifiers: Dict[str, List[str]] = None) -> List[SeqFeature]:
        mine = OrderedDict()  # type: Dict[str, List[str]]
        if self.domain_subtype:
            mine["domain_subtype"] = [self.domain_subtype]
        if self.specificity:
            mine["specificity"] = self.specificity
        if qualifiers:
            mine.update(qualifiers)
        return super().to_biopython(mine) 
示例23
def to_biopython(self, qualifiers: Dict[str, Any] = None) -> List[SeqFeature]:
        """ Converts this feature into one or more SeqFeature instances.

            Subclasses must manage their own attributes and potential extra
            features.
        """
        feature = SeqFeature(self.location, type=self.type)
        quals = self._qualifiers.copy()
        notes = self._qualifiers.get("note", [])
        assert notes is not None
        notes.extend(self.notes)
        if qualifiers:
            notes += qualifiers.pop("note", [])
            quals.update(qualifiers)
        if notes:
            # sorting helps with consistency and comparison testing
            quals["note"] = sorted(notes)
        if self.created_by_antismash:
            quals["tool"] = ["antismash"]
        if self._original_codon_start is not None:
            start = int(self._original_codon_start)
            quals["codon_start"] = [str(start + 1)]
            # adjust location back if neccessary
            if self.location.strand == -1:
                start *= -1
            if self._original_codon_start != 0:
                feature.location = _adjust_location_by_offset(feature.location, -start)
        # sorted here to match the behaviour of biopython
        for key, val in sorted(quals.items()):
            feature.qualifiers[key] = val
        assert isinstance(feature.qualifiers, dict)
        return [feature] 
示例24
def to_biopython(self, qualifiers: Optional[Dict[str, List[str]]] = None) -> List[SeqFeature]:
        if not qualifiers:
            qualifiers = {}
        if self._parent_record:
            qualifiers["region_number"] = [str(self.get_region_number())]
        qualifiers["product"] = self.products
        qualifiers["rules"] = self.detection_rules
        qualifiers["probabilities"] = ["%.4f" % prob for prob in self.probabilities]
        qualifiers["subregion_numbers"] = [str(sub.get_subregion_number()) for sub in self._subregions]
        candidates = [str(cand.get_candidate_cluster_number()) for cand in self._candidate_clusters]
        qualifiers["candidate_cluster_numbers"] = candidates

        return super().to_biopython(qualifiers) 
示例25
def to_biopython(self, qualifiers: Dict[str, Any] = None) -> SeqFeature:
        """ Construct a matching SeqFeature for this Gene """
        if not qualifiers:
            qualifiers = {}
        if self.locus_tag:
            qualifiers["locus_tag"] = [self.locus_tag]
        if self.gene_name:
            qualifiers["gene"] = [self.gene_name]
        return super().to_biopython(qualifiers) 
示例26
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        # grab mandatory qualifiers and create the class
        locus = leftovers.pop("locus_tag", [""])[0] or None
        name = leftovers.pop("gene", [""])[0] or None
        if not (locus or name):
            name = "gene%s_%s" % (bio_feature.location.start, bio_feature.location.end)
        feature = cls(bio_feature.location, locus_tag=locus, gene_name=name)
        super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
        return feature 
示例27
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, Any] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)

        section = leftovers.pop("prepeptide", [""])[0]
        if not section:
            raise SecmetInvalidInputError("cannot reconstruct Prepeptide from biopython feature %s" % bio_feature)
        elif section != "core":
            raise SecmetInvalidInputError("Prepeptide can only be reconstructed from core feature")
        alt_weights = [float(weight) for weight in leftovers.pop("alternative_weights", [])]
        leader = leftovers.pop("leader_sequence", [""])[0]
        locations = [bio_feature.location]
        if leader:
            leader_location = location_from_string(leftovers.pop("leader_location")[0])
            locations.insert(0, leader_location)
        tail = leftovers.pop("tail_sequence", [""])[0]
        if tail:
            tail_location = location_from_string(leftovers.pop("tail_location")[0])
            locations.append(tail_location)

        location = build_location_from_others(locations)

        return cls(
            location,
            leftovers.pop("peptide")[0],
            leftovers.pop("core_sequence")[0],
            leftovers.pop("locus_tag")[0],
            leftovers.pop("aSTool")[0],
            leftovers.pop("predicted_class")[0],
            float(leftovers.pop("score")[0]),
            float(leftovers.pop("monoisotopic_mass")[0]),
            float(leftovers.pop("molecular_weight")[0]),
            alt_weights, leader, tail
        ) 
示例28
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)
        if not feature:
            raise SecmetInvalidInputError("AntismashFeature shouldn't be instantiated directly")
        else:
            assert isinstance(feature, AntismashFeature)

        # semi-optional qualifiers
        if leftovers.get("tool") == ["antismash"] and not feature.tool:
            raise SecmetInvalidInputError("an AntismashFeature created by antiSMASH must have a tool supplied")

        # grab optional qualifiers
        feature.domain_id = leftovers.pop("domain_id", [""])[0] or None
        if feature.domain_id:
            # long ids causing linebreaks in genbanks can have spaces inserted
            # strip them out so id-based lookups can function again
            feature.domain_id = feature.domain_id.replace(" ", "")
        feature.database = leftovers.pop("database", [""])[0] or None
        feature.detection = leftovers.pop("detection", [""])[0] or None
        feature.label = leftovers.pop("label", [""])[0] or None
        if feature.label:
            # again, long ids causing linebreaks in genbanks can have spaces inserted
            feature.label = feature.label.replace(" ", "")
        if not feature.locus_tag:  # may already be populated
            feature.locus_tag = leftovers.pop("locus_tag", [""])[0] or None
        translation = leftovers.pop("translation", [""])[0] or None
        if translation is not None:
            feature.translation = translation
        if "evalue" in leftovers:
            feature.evalue = float(leftovers.pop("evalue")[0])
        if "score" in leftovers:
            feature.score = float(leftovers.pop("score")[0])

        # grab parent optional qualifiers
        updated = super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
        assert isinstance(updated, AntismashFeature)
        return updated 
示例29
def from_biopython(cls: Type[T], bio_feature: SeqFeature, feature: T = None,
                       leftovers: Dict[str, List[str]] = None, record: Any = None) -> T:
        assert issubclass(cls, CDSCollection)
        if leftovers is None:
            leftovers = Feature.make_qualifiers_copy(bio_feature)

        contig_edge = leftovers.pop("contig_edge", [""])[0] == "True"
        if not feature:
            feature = cls(bio_feature.location, bio_feature.type)
            feature._contig_edge = contig_edge  # pylint: disable=protected-access

        # grab parent optional qualifiers
        super().from_biopython(bio_feature, feature=feature, leftovers=leftovers, record=record)
        return feature 
示例30
def to_biopython(self, qualifiers: Optional[Dict[str, List[str]]] = None) -> List[SeqFeature]:
        if not qualifiers:
            qualifiers = {}
        if self.parent_record:
            qualifiers["contig_edge"] = [str(self.contig_edge)]
        return super().to_biopython(qualifiers)