Python源码示例:Bio.SeqFeature.FeatureLocation()

示例1
def add_point_feature(self, resnum, feat_type=None, feat_id=None, qualifiers=None):
        """Add a feature to the features list describing a single residue.

        Args:
            resnum (int): Protein sequence residue number
            feat_type (str, optional): Optional description of the feature type (ie. 'catalytic residue')
            feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')

        """
        if self.feature_file:
            raise ValueError('Feature file associated with sequence, please remove file association to append '
                             'additional features.')

        if not feat_type:
            feat_type = 'Manually added protein sequence single residue feature'
        newfeat = SeqFeature(location=FeatureLocation(ExactPosition(resnum-1), ExactPosition(resnum)),
                             type=feat_type,
                             id=feat_id,
                             qualifiers=qualifiers)

        self.features.append(newfeat) 
示例2
def add_region_feature(self, start_resnum, end_resnum, feat_type=None, feat_id=None, qualifiers=None):
        """Add a feature to the features list describing a region of the protein sequence.

        Args:
            start_resnum (int): Start residue number of the protein sequence feature
            end_resnum (int): End residue number of the protein sequence feature
            feat_type (str, optional): Optional description of the feature type (ie. 'binding domain')
            feat_id (str, optional): Optional ID of the feature type (ie. 'TM1')

        """
        if self.feature_file:
            raise ValueError('Feature file associated with sequence, please remove file association to append '
                             'additional features.')

        if not feat_type:
            feat_type = 'Manually added protein sequence region feature'
        newfeat = SeqFeature(location=FeatureLocation(start_resnum-1, end_resnum),
                             type=feat_type,
                             id=feat_id,
                             qualifiers=qualifiers)

        self.features.append(newfeat) 
示例3
def get_residue_annotations(self, start_resnum, end_resnum=None):
        """Retrieve letter annotations for a residue or a range of residues

        Args:
            start_resnum (int): Residue number
            end_resnum (int): Optional residue number, specify if a range is desired

        Returns:
            dict: Letter annotations for this residue or residues

        """
        if not end_resnum:
            end_resnum = start_resnum

        # Create a new SeqFeature
        f = SeqFeature(FeatureLocation(start_resnum - 1, end_resnum))

        # Get sequence properties
        return f.extract(self).letter_annotations 
示例4
def test_translate_feature(self):
        '''
        Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids
        '''
        # Seq -> Amino https://en.wikipedia.org/wiki/DNA_codon_table
        seq1 = Seq("TTTCTTATGGTCGTA") 
        seq2 = Seq("TCTTCAACTGCTACA")
        seq3 = Seq("CATAATGAATATAAT")
        aln = {'seq1': seq1,
               'seq2': seq2,
               'seq3': seq3}
        feature = SeqFeature(FeatureLocation(0, 15), type="domain")

        # expected results
        expected_translations = {'seq1': 'FLMVV',
                                 'seq2': 'SSTAT',
                                 'seq3': 'HNEYN'}

        assert translate.translate_feature(aln, feature) == expected_translations

    # TODO: test_vcf_feature, assign_aa_vcf, assign_aa_fasta
    # Unclear how to emulate inputs (TreeTime dict, tree) 
示例5
def create_faux_record_from_proteins(proteins, id):
    from Bio.SeqRecord import SeqRecord
    from Bio.Seq import Seq
    from Bio.SeqFeature import SeqFeature, FeatureLocation
    record = SeqRecord(seq=Seq(''), id=id)
    start = 0
    end = 0
    max_protein_id_len = 45
    for protein in proteins:
        nucl_length = len(protein.seq) * 3
        end += nucl_length
        feature = SeqFeature(
            location=FeatureLocation(start, end, strand=1),
            type="CDS",
            qualifiers={
                'protein_id': [protein.id[:max_protein_id_len]],
                'translation': [str(protein.seq)]
            }
        )
        start += nucl_length
        record.features.append(feature)
    return record 
示例6
def fetch_source_feature(self, gb_record):
        source_feature = None
        has_source = False
        for i in gb_record.features:
            if i.type == "source":
                source_feature = i
                has_source = True
                break
        if not has_source:
            ##加一个source feature
            my_start_pos = SeqFeature.ExactPosition(0)
            my_end_pos = SeqFeature.ExactPosition(len(gb_record.seq))
            my_feature_location = FeatureLocation(my_start_pos, my_end_pos)
            my_feature_type = "source"
            source_feature = SeqFeature.SeqFeature(my_feature_location, type=my_feature_type)
            gb_record.features.insert(0, source_feature)
        return source_feature 
示例7
def drawFig(self):
        gdd = GenomeDiagram.Diagram('linear figure')
        gdt_features = gdd.new_track(1, greytrack=False, scale=0, height=0.4)
        gds_features = gdt_features.new_set()
        for name, start, stop in self.list_name_start_stop:
            if "COX" in name.upper():
                color = "#81CEEA"
            elif "NAD" in name.upper():
                color = "#F9C997"
            elif "ATP" in name.upper():
                color = "#E97E8D"
            elif ("CYTB" in name.upper()) or ("COB" in name.upper()):
                color = "#E2E796"
            elif "RRN" in name.upper():
                color = "#94F2DB"
            # strand = -1 if name in ["nad1", "cytb", "nad4", "nad4L", "rrnL"] else 1
            feature = SeqFeature(FeatureLocation(int(start), int(stop)), strand=1)
            gds_features.add_feature(feature, name=name, label=True,
                                     label_size=self.dict_args["label_size"], label_angle=self.dict_args["Label_angle"],
                                     color=self.dict_args["Label_color"], label_position=self.dict_args["Label_position"],
                                     sigil="BIGARROW", arrowshaft_height=0.5,
                                     arrowhead_length=0.5)
        gdd.draw(format='linear', pagesize=(self.dict_args["fig_width"] * cm, self.dict_args["fig_height"] * cm), fragments=1,
                 start=0, end=int(stop))
        gdd.write(self.dict_args["exportPath"] + os.sep + "linear.pdf", "pdf") 
示例8
def test_add_results_to_record(self):
        pfams = {'PF00015.2': FeatureLocation(0, 3), 'PF00351.1': FeatureLocation(0, 3),
                 'PF00015.27': FeatureLocation(3, 6)}
        fake_record = set_dummy_with_pfams(pfams)
        fake_duplicate_pfam = DummyPFAMDomain(identifier="PF00015.2")
        fake_record.add_pfam_domain(fake_duplicate_pfam)
        assert fake_duplicate_pfam in fake_record.get_pfam_domains()
        gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
        fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
        fake_results.add_to_record(fake_record)
        assert fake_duplicate_pfam.full_identifier == 'PF00015.2'
        for pfam in fake_record.get_pfam_domains():
            assert sorted(pfam.gene_ontologies.ids) == sorted(fake_results.get_all_gos(pfam))
            # make sure identical pfams (with different version numbers) all have the same gene ontologies
            if pfam.identifier == "PF00015":
                assert pfam.version in [2, 27]
                assert sorted(pfam.gene_ontologies.ids) == sorted(fake_results.get_all_gos(fake_duplicate_pfam)) 
示例9
def test_to_json(self):
        fake_pfam_location = FeatureLocation(0, 12)
        pfams = {'PF00015': fake_pfam_location, 'PF00351': fake_pfam_location}
        fake_record = set_dummy_with_pfams(pfams)
        gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
        fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
        result_json = fake_results.to_json()
        expected_result = {"pfams": {"PF00015": {"GO:0007165": "signal transduction",
                                                 "GO:0016020": "membrane"},
                                     "PF00351": {"GO:0016714": ("oxidoreductase activity, acting on paired donors, "
                                                                "with incorporation or reduction of molecular oxygen, "
                                                                "reduced pteridine as one donor, and incorporation of "
                                                                "one atom of oxygen"),
                                                 "GO:0055114": "oxidation-reduction process"}},
                           "record_id": fake_record.id,
                           "schema_version": 1}
        assert result_json["record_id"] == expected_result["record_id"]
        assert result_json["schema_version"] == 1
        for pfam in expected_result["pfams"]:
            assert expected_result["pfams"][pfam] == result_json["pfams"][pfam] 
示例10
def test_from_wrong_schema(self):
        fake_pfam_location = FeatureLocation(0, 12)
        pfams = {'PF00015': fake_pfam_location, 'PF00351': fake_pfam_location, 'PF05147': fake_pfam_location}
        fake_record = set_dummy_with_pfams(pfams)
        broken_json = {"pfams": {"PF00015": {"GO:0004871": "signal transducer activity",
                                             "GO:0007165": "signal transduction",
                                             "GO:0016020": "membrane"},
                                 "PF00351": {"GO:0016714": ("oxidoreductase activity, acting on paired donors, "
                                                            "with incorporation or reduction of molecular oxygen, "
                                                            "reduced pteridine as one donor, and incorporation of "
                                                            "one atom of oxygen"),
                                             "GO:0055114": "oxidation-reduction process"}},
                       "record_id": fake_record.id,
                       "schema_version": 2}
        with self.assertLogs() as log_cm:
            from_broken_json = pfam2go.Pfam2GoResults.from_json(broken_json, fake_record)
            assert "Schema version mismatch, discarding Pfam2GO results" in str(log_cm.output)
            assert not from_broken_json 
示例11
def test_first_gene_forward(self, patched_enumerate):
        # ensure coverage only considers this gene of interest
        gene_of_interest = self.add_gene("A", 10, 20, 1)
        patched_enumerate.return_value = [(0, gene_of_interest)]
        other_gene = self.add_gene("B", 30, 40, 1)

        for strand in [1, -1]:
            other_gene.location = FeatureLocation(30, 40, strand)
            print(other_gene.location)

            promoters = self.get_promoters(5, 75)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "A", 5, 20)

            promoters = self.get_promoters(25, 75)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "A", 0, 20)

            promoters = self.get_promoters(5, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "A", 5, 15)

            promoters = self.get_promoters(25, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "A", 0, 15) 
示例12
def test_last_gene_forward(self, patched_enumerate):
        other_gene = self.add_gene("A", 10, 20, 1)
        # ensure coverage only considers this gene of interest
        gene_of_interest = self.add_gene("B", 30, 40, 1)
        patched_enumerate.return_value = [(1, gene_of_interest)]

        for strand in [1, -1]:
            other_gene.location = FeatureLocation(10, 20, strand)
            print(other_gene.location)

            promoters = self.get_promoters(5, 75)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 25, 40)

            promoters = self.get_promoters(25, 75)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 21, 40)

            promoters = self.get_promoters(5, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 25, 35)

            promoters = self.get_promoters(25, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 21, 35) 
示例13
def test_normal_case_forward(self, patched_enumerate):
        other = self.add_gene("A", 10, 20, 1)
        gene_of_interest = self.add_gene("B", 40, 60, 1)
        self.add_gene("C", 70, 80, 1)
        patched_enumerate.return_value = [(1, gene_of_interest)]

        for strand in [-1, 1]:
            other.location = FeatureLocation(other.location.start, other.location.end, strand)

            promoters = self.get_promoters(5, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 35, 45)

            promoters = self.get_promoters(5, 25)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 35, 60)

            promoters = self.get_promoters(25, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 21, 45)

            promoters = self.get_promoters(25, 25)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 21, 60) 
示例14
def test_normal_case_reverse(self, patched_enumerate):
        self.add_gene("A", 10, 20, 1)
        gene_of_interest = self.add_gene("B", 40, 60, -1)
        other = self.add_gene("C", 70, 80, -1)
        patched_enumerate.return_value = [(1, gene_of_interest)]

        for strand in [-1]:
            other.location = FeatureLocation(other.location.start, other.location.end, strand)

            promoters = self.get_promoters(5, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 55, 65)

            promoters = self.get_promoters(5, 25)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 40, 65)

            promoters = self.get_promoters(25, 5)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 55, 69)

            promoters = self.get_promoters(25, 25)
            assert len(promoters) == 1
            self.check_single_promoter(promoters[0], "B", 40, 69) 
示例15
def get_aa_translation_from_location(self, location: FeatureLocation,
                                         transl_table: Union[str, int] = None) -> Seq:
        """ Obtain the translation for a feature based on its location """
        if location.end > len(self.seq):
            raise ValueError("location outside available sequence")
        if transl_table is None:
            transl_table = self._transl_table
        extracted = location.extract(self.seq).ungap('-')
        if len(extracted) % 3 != 0:
            extracted = extracted[:-(len(extracted) % 3)]
        seq = extracted.translate(to_stop=True, table=transl_table)
        if not seq:
            # go past stop codons and hope for something to work with
            seq = extracted.translate(table=transl_table)

        # replace ambiguous proteins with an explicit unknown
        string_version = str(seq)
        for invalid in "*BJOUZ":
            string_version = string_version.replace(invalid, "X")
        seq = Seq(string_version, Alphabet.generic_protein)

        if "-" in str(seq):
            seq = Seq(str(seq).replace("-", ""), Alphabet.generic_protein)
        return seq 
示例16
def create_feature_from_location(record: Record, location: FeatureLocation,
                                 label: Optional[str] = None) -> CDSFeature:
    """ Creates a CDS feature covering the provided location.

        Arguments:
            record: The Record the CDSFeature will belong to, used to generate
                    the feature translation
            location: The FeatureLocation specifying the location of the CDSFeature
            label: The locus tag, protein id, and gene name to use for the new
                   CDSFeature

        Returns:
            The CDSFeature created.
    """
    if label is None:
        digits = len(str(len(record)))
        label = 'allorf_{start:0{digits}}_{end:0{digits}}'.format(
            digits=digits, start=(location.start + 1), end=location.end
        )
    feature = CDSFeature(location, str(record.get_aa_translation_from_location(location)),
                         locus_tag=label, protein_id=label, gene=label)
    feature.created_by_antismash = True
    return feature 
示例17
def _set_before(self, location):
        """
        Changes a FeatureLocation to include a "BeforePosition" or
        "AfterPosition" to indicate that the mRNA does not include
        start codon.
        """
        if location.strand >= 0: # forward strand
            if len(location.parts) > 1:
                location.parts[0] = FeatureLocation( BeforePosition(location.parts[0].start), location.parts[0].end, strand = location.parts[0].strand )
            else:
                location = FeatureLocation( BeforePosition(location.start), location.end, strand = location.strand)
        else:
            if len(location.parts) > 1:
                location.parts[-1] = FeatureLocation( location.parts[-1].start, AfterPosition(location.parts[-1].end), strand = location.parts[-1].strand )
            else:
                location = FeatureLocation( location.start, AfterPosition(location.end), strand = location.strand)
        return location 
示例18
def _set_after(self, location):
        """
        Changes a FeatureLocation to include a "BeforePosition" or
        "AfterPosition" to indicate that the mRNA does not include
        stop codon.
        """
        if location.strand >= 0: # forward strand
            if len(location.parts) > 1:
                location.parts[-1] = FeatureLocation( location.parts[-1].start, AfterPosition(location.parts[-1].end), strand = location.parts[-1].strand )
            else:
                location = FeatureLocation( location.start, AfterPosition(location.end), strand = location.strand)
        else:
            if len(location.parts) > 1:
                location.parts[0] = FeatureLocation( BeforePosition(location.parts[0].start), location.parts[0].end, strand = location.parts[0].strand )
            else:
                location = FeatureLocation( BeforePosition(location.start), location.end, strand = location.strand)
        return location 
示例19
def get_subsequence(self, resnums, new_id=None, copy_letter_annotations=True):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        # XTODO: documentation

        if not self.seq_record:
            raise ValueError('No chain sequence stored')

        biop_compound_list = []
        for resnum in resnums:
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        if len(biop_compound_list) == 0:
            log.info('Zero length subsequences')
            return
        elif len(biop_compound_list) == 1:
            log.debug('Subsequence only one residue long')
            sub_feature_location = biop_compound_list[0]
        else:
            sub_feature_location = CompoundLocation(biop_compound_list)

        sub_feature = sub_feature_location.extract(self.seq_record)

        if not new_id:
            new_id = '{}_subseq'.format(self.id)

        new_sp = SeqProp(id=new_id, seq=sub_feature)
        if copy_letter_annotations:
            new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp 
示例20
def get_subsequence(self, resnums, new_id=None, copy_letter_annotations=True):
        """Get a subsequence as a new SeqProp object given a list of residue numbers"""
        # XTODO: documentation
        biop_compound_list = []
        for resnum in resnums:
            # XTODO can be sped up by separating into ranges based on continuous resnums
            feat = FeatureLocation(resnum - 1, resnum)
            biop_compound_list.append(feat)

        if len(biop_compound_list) == 0:
            log.debug('Zero length subsequence')
            return
        elif len(biop_compound_list) == 1:
            log.debug('Subsequence only one residue long')
            sub_feature_location = biop_compound_list[0]
        else:
            sub_feature_location = CompoundLocation(biop_compound_list)

        try:
            sub_feature = sub_feature_location.extract(self)
        except TypeError:
            log.critical('SeqProp {}: unknown error when trying to get subsequence - please investigate! '
                          'Try using a feature to extract a subsequence from the SeqProp'.format(self.id))
            return

        if not new_id:
            new_id = '{}_subseq'.format(self.id)

        new_sp = SeqProp(id=new_id, seq=sub_feature.seq)
        if copy_letter_annotations:
            new_sp.letter_annotations = sub_feature.letter_annotations
        return new_sp 
示例21
def test_merge_split(self):
        """
        Test the merge and split function

        Paracoccus_yeei_TT13.gb has the following compound locations:
        PYTT13_06780: join{[1366920:1367182](-), [1365992:1366921](-)}
                      [1365992:1367182](-)

        PYTT13_11395: join{[2283890:2284152](+), [2284151:2285080](+)}
                      [2283890:2285080](+)

        PYTT13_11465: join{[2301567:2301817](+), [2301816:2302655](+)}
                      [2301567:2302655](+)

        PYTT13_12460: join{[2495319:2495581](+), [2495580:2496509](+)}
                      [2495319:2496509](+)

        PYTT13_16505: join{[3331106:3331356](+), [3331355:3332194](+)}
                      [3331106:3332194](+)

        :return:
        """
        correct_locations = {
            'PYTT13_06780': (1365992, 1367182),
            'PYTT13_11395': (2283890, 2285080),
            'PYTT13_11465': (2301567, 2302655),
            'PYTT13_12460': (2495319, 2496509),
            'PYTT13_16505': (3331106, 3332194)
        }

        testgbk = "test_genbank_files/Paracoccus_yeei_TT13.gb.gz"
        handle = gzip.open(testgbk, 'rt')
        record = SeqioFilter(SeqIO.parse(handle, "genbank"))
        handle.close()
        for s in record:
            for f in s.get_features("CDS"):
                if 'locus_tag' in f.qualifiers and f.qualifiers['locus_tag'][0] in correct_locations:
                    lt = f.qualifiers['locus_tag'][0]
                    self.assertIsInstance(f.location,FeatureLocation)
                    self.assertEqual(f.location.start, correct_locations[lt][0])
                    self.assertEqual(f.location.end, correct_locations[lt][1]) 
示例22
def write_genbank(self):
    """
    Write prophages and their potential attachment sites in updated input GenBank file.
    :param self: the data object
    :return: None
    """

    log_and_message("Writing GenBank output file", c="GREEN", stderr=True, quiet=self.quiet)
    prophage_feature_type = 'misc_feature'  # / prophage_region
    outfile = os.path.join(self.output_dir, self.file_prefix + os.path.basename(self.infile))
    for i in self.pp:
        self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
                    location=FeatureLocation(self.pp[i]['start'], self.pp[i]['stop']),
                    type=prophage_feature_type,
                    strand=1,
                    qualifiers=OrderedDict(
                        {'note': f'prophage region pp{i} identified with PhiSpy v{version.__version__}'}
                    )))
        if 'atts' in self.pp[i]:
            self.record.get_entry(self.pp[i]['contig']).append_feature(SeqFeature(
                        location=FeatureLocation(int(self.pp[i]['att'][0]), int(self.pp[i]['att'][1])) +
                                 FeatureLocation(int(self.pp[i]['att'][2]), int(self.pp[i]['att'][3])),
                        type='repeat_region',
                        strand=1,
                        qualifiers=OrderedDict({'note': f'prophage region pp{i} potential attachment sites'})))

    # are we writing a gzip file
    if is_gzip_file(self.infile):
        handle = gzip.open(outfile, 'wt')
    else:
        handle = open(outfile, 'w')

    SeqIO.write(self.record, handle, 'genbank') 
示例23
def _get_pfam_loc(self, query_start, query_end, feature):
        if feature.strand == 1:
            start = feature.location.start + 3 * query_start
            end = feature.location.start + 3 * query_end
        elif feature.strand == -1:
            end = feature.location.end - 3 * query_start
            start = feature.location.end - 3 * query_end
        else:
            raise ValueError('Invalid strand for feature: {}'.format(feature))
        return FeatureLocation(start, end, strand=feature.strand) 
示例24
def processed_record(detector_name='deepbgc', detector_label='deepbgc', score_threshold=0.5):
    comment_key = util.format_detector_meta_key(detector_label)
    record = SeqRecord(Seq('ACTGCTCGACTGATT', alphabet=generic_dna))
    record.annotations['structured_comment'] = collections.OrderedDict()
    record.annotations['structured_comment'][comment_key] = collections.OrderedDict(
        name=detector_name,
        label=detector_label,
        score_threshold=score_threshold
    )
    # Add protein features
    record.features.append(SeqFeature(FeatureLocation(0, 2), type='CDS', qualifiers={'locus_tag': ['A']}))
    record.features.append(SeqFeature(FeatureLocation(2, 5), type='CDS', qualifiers={'locus_tag': ['B']}))
    record.features.append(SeqFeature(FeatureLocation(5, 8), type='CDS', qualifiers={'locus_tag': ['C']}))
    # Add pfam features
    score_column = util.format_bgc_score_column(detector_name)
    qualifiers = {score_column: [0.4], 'db_xref': ['PF00001'], 'locus_tag': ['A'], 'database': [PFAM_DB_VERSION]}
    record.features.append(SeqFeature(FeatureLocation(0, 2), type=util.PFAM_FEATURE, qualifiers=qualifiers))
    qualifiers = {score_column: [0.7], 'db_xref': ['PF00002'], 'locus_tag': ['B'], 'database': [PFAM_DB_VERSION]}
    record.features.append(SeqFeature(FeatureLocation(2, 5), type=util.PFAM_FEATURE, qualifiers=qualifiers))
    qualifiers = {score_column: [0.6], 'db_xref': ['PF00003'], 'locus_tag': ['C'], 'database': [PFAM_DB_VERSION]}
    record.features.append(SeqFeature(FeatureLocation(5, 8), type=util.PFAM_FEATURE, qualifiers=qualifiers))
    # Add BGC features
    qualifiers = { score_column: ['0.6'], 'detector': [detector_name], 'detector_label': [detector_label]}
    record.features.append(SeqFeature(FeatureLocation(0, 5), type='cluster', qualifiers=qualifiers))
    qualifiers = { 'detector': ['annotated'], 'detector_label': ['annotated']}
    record.features.append(SeqFeature(FeatureLocation(2, 8), type='cluster', qualifiers=qualifiers))
    return record 
示例25
def test_feature_creation(self):
        fw_loc = FeatureLocation(210, 300, strand=1)
        fw_feature = SeqFeature(fw_loc, type='CDS')
        results = tta.tta.TTAResults('dummy', gc_content=1, threshold=0.65)
        ret = results.new_feature_from_other(fw_feature, 12)
        self.assertEqual(ret.strand, 1)
        self.assertEqual(ret.location.start, 222)
        self.assertEqual(ret.location.end, 225)

        rv_loc = FeatureLocation(210, 300, strand=-1)
        rv_feature = SeqFeature(rv_loc, type='CDS')
        ret = results.new_feature_from_other(rv_feature, 12)
        self.assertEqual(ret.strand, -1)
        self.assertEqual(ret.location.start, 285)
        self.assertEqual(ret.location.end, 288) 
示例26
def set_dummy_with_pfams(pfam_ids: Dict[str, FeatureLocation]) -> DummyRecord:
    pfam_domains = []
    for pfam_id, pfam_location in pfam_ids.items():
        domain_id = '%s.%d.%d' % (pfam_id, pfam_location.start, pfam_location.end)
        pfam_domain = DummyPFAMDomain(location=pfam_location, protein_start=0, protein_end=5,
                                      identifier=pfam_id, domain_id=domain_id)
        pfam_domains.append(pfam_domain)
    return DummyRecord(features=pfam_domains) 
示例27
def test_get_gos(self):
        pfams = {'PF00015': FeatureLocation(0, 3), 'PF00351.42': FeatureLocation(6, 12)}
        fake_record = set_dummy_with_pfams(pfams)
        gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
        for all_ontologies in gos_for_fake_pfam.values():
            for ontologies in all_ontologies:
                go_ids = [str(go_entry) for go_entry in ontologies.go_entries]
                for go_id in go_ids:
                    assert go_id in self.known_connections[ontologies.pfam] 
示例28
def test_results(self):
        pfams = {'PF00015': FeatureLocation(0, 3)}
        fake_record = set_dummy_with_pfams(pfams)
        gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
        fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
        assert gos_for_fake_pfam == fake_results.pfam_domains_with_gos
        assert fake_record.id == fake_results.record_id
        for pfam, all_ontologies in fake_results.pfam_domains_with_gos.items():
            for ontologies in all_ontologies:
                assert ontologies.pfam == pfam.identifier 
示例29
def test_from_json(self):
        fake_pfam_location = FeatureLocation(0, 12)
        pfams = {'PF00015': fake_pfam_location, 'PF00351': fake_pfam_location, 'PF05147': fake_pfam_location}
        fake_record = set_dummy_with_pfams(pfams)
        gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record)
        fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam)
        result_json = fake_results.to_json()
        results_from_json = pfam2go.Pfam2GoResults.from_json(result_json, fake_record)
        assert 'PF05147' not in result_json["pfams"]
        for pfam in results_from_json.pfam_domains_with_gos:
            assert pfam.identifier in result_json["pfams"]
        from_json_to_json = results_from_json.to_json()
        assert result_json == from_json_to_json
        assert from_json_to_json["schema_version"] == 1 
示例30
def test_result_vec_to_feature(self):
        "Test thiopeptides.result_vec_to_features()"
        loc = FeatureLocation(0, 66, strand=1)
        orig_feature = DummyCDS(0, 66, locus_tag='FAKE0001')
        vec = Thiopeptide(23, 42, 51)
        seq = 'SCTSSCTSS'
        vec.thio_type = 'Type III'
        vec.core = seq
        vec.leader = "HEADHEADHEAD"
        orig_feature.translation = seq + vec.leader
        motif = result_vec_to_feature(orig_feature, vec)

        leader, core = motif.to_biopython()

        assert loc.start == leader.location.start
        assert loc.start + (12 * 3) == leader.location.end
        assert loc.strand == leader.location.strand
        assert motif.type == 'CDS_motif'
        assert motif.peptide_class == "thiopeptide"
        assert motif.peptide_subclass == "Type III"
        assert orig_feature.locus_tag == motif.locus_tag
        assert motif.detailed_information.rodeo_score == 51
        assert motif.score == 42
        self.assertAlmostEqual(motif.molecular_weight, 861.9, places=1)

        assert motif.leader == "HEADHEADHEAD"
        assert leader.location.end == core.location.start
        assert loc.end == core.location.end
        assert loc.strand == core.location.strand
        self.assertAlmostEqual(motif.monoisotopic_mass, 861.3, places=1)
        assert len(motif.alternative_weights) == 7
        for calc, expect in zip(motif.alternative_weights, [879.9, 897.9, 916.0,
                                                            934.0, 952.0, 970.0,
                                                            988.0]):
            self.assertAlmostEqual(calc, expect, places=1)
        assert not motif.detailed_information.amidation
        assert not motif.detailed_information.macrocycle
        assert not motif.tail
        assert motif.detailed_information.core_features == "Central ring: pyridine trisubstituted"
        assert motif.core == "SCTSSCTSS"