Java源码示例:htsjdk.variant.variantcontext.Genotype

示例1
@Test
public void filterOutHetSinglton() {
 VariantContextBuilder b = new VariantContextBuilder();
 Allele a1 = Allele.create("A", true);
 Allele a2 = Allele.create("T", false);

 final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
 final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
 final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));

 // this has a het singleton.
 Collection<Genotype> genotypes = new ArrayList<>();
 genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
 genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
 genotypes.add(GenotypeBuilder.create("donor3", allelesRef));

 VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
 Assert.assertNotNull(vc);

 Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();

 VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
 boolean t1 = f.filterOut(vc);
 Assert.assertFalse(t1);

}
 
示例2
@Override
//template method for calculating strand bias annotations using the three different methods
public Map<String, Object> annotate(final ReferenceContext ref,
                                    final VariantContext vc,
                                    final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(vc);
    if ( !vc.isVariant() ) {
        return Collections.emptyMap();
    }

    // if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
    if ( vc.hasGenotypes() ) {
        for (final Genotype g : vc.getGenotypes()) {
            if (g.hasAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
                return calculateAnnotationFromGTfield(vc.getGenotypes());
            }
        }
    }

    if (likelihoods != null) {
        return calculateAnnotationFromLikelihoods(likelihoods, vc);
    }
    return Collections.emptyMap();
}
 
示例3
@NotNull
public static VariantContext enrich(@NotNull final VariantContext context) {
    if (!context.isIndel() && !context.isSNP() ) {
        return context;
    }

    final VariantContextBuilder contextBuilder = new VariantContextBuilder(context).noGenotypes();

    final List<Allele> alleles = context.getAlleles();
    final Function<Allele, String> alleleKey = alleleKey(context);

    final List<Genotype> updatedGenotypes = Lists.newArrayList();
    for (Genotype genotype : context.getGenotypes()) {
        if (!genotype.hasAD() && hasRequiredAttributes(genotype, alleles, alleleKey)) {
            final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).AD(readAD(genotype, alleles, alleleKey));
            updatedGenotypes.add(genotypeBuilder.make());
        } else {
            updatedGenotypes.add(genotype);
        }
    }

    return contextBuilder.genotypes(updatedGenotypes).make();
}
 
示例4
@NotNull
private static int[] readAD(@NotNull final Genotype tumorGenotype, @NotNull final List<Allele> alleles,
        @NotNull Function<Allele, String> alleleKey) {
    final List<Integer> alleleAds = alleles.stream().map(allele -> {
        final String[] alleleADs = tumorGenotype.getExtendedAttribute(alleleKey.apply(allele), "0").toString().split(SEPARATOR);
        if (alleleADs.length > 0) {
            try {
                return Integer.parseInt(alleleADs[0]);
            } catch (final NumberFormatException e) {
                return 0;
            }
        }
        return 0;
    }).collect(Collectors.toList());
    return alleleAds.stream().mapToInt(Integer::intValue).toArray();
}
 
示例5
/**
 * Initialize cache of allele anyploid indices
 *
 * Initialize the cache of PL index to a list of alleles for each ploidy.
 *
 * @param vc    Variant Context
 */
private void initalizeAlleleAnyploidIndicesCache(final VariantContext vc) {
    if (vc.getType() != VariantContext.Type.NO_VARIATION) { // Bypass if not a variant
        for (final Genotype g : vc.getGenotypes()) {
            // Make a new entry if the we have not yet cached a PL to allele indices map for this ploidy and allele count
            // skip if there are no PLs -- this avoids hanging on high-allelic somatic samples, for example, where
            // there's no need for the PL indices since they don't exist
            if (g.getPloidy() != 0 && (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles())) {
                if (vc.getGenotypes().stream().anyMatch(Genotype::hasLikelihoods)) {
                    GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy());
                    ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles());
                }
            }
        }
    }

}
 
示例6
@NotNull
private VariantContext create(@NotNull final BaseDepth snp) {
    final List<Allele> alleles = Lists.newArrayList();
    alleles.add(Allele.create(snp.ref().toString(), true));
    alleles.add(Allele.create(snp.alt().toString(), false));

    final List<Integer> adField = Lists.newArrayList();
    adField.add(snp.refSupport());
    adField.add(snp.altSupport());

    final Genotype normal = new GenotypeBuilder(config.primaryReference()).DP(snp.readDepth())
            .AD(adField.stream().mapToInt(i -> i).toArray())
            .alleles(alleles)
            .make();

    final VariantContextBuilder builder = new VariantContextBuilder().chr(snp.chromosome())
            .start(snp.position())
            .computeEndFromAlleles(alleles, (int) snp.position())
            .genotypes(normal)
            .alleles(alleles);

    return builder.make();
}
 
示例7
@SuppressWarnings("unchecked")
public static int[] getStrandCounts(final Genotype g) {
    int[] data;
    if ( g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY).getClass().equals(String.class)) {
        final String sbbsString = (String)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY);
        data = encodeSBBS(sbbsString);
    } else if (g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY).getClass().equals(ArrayList.class)) {
       if (((ArrayList<Object>)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)).get(0) instanceof Integer) {
            data = encodeSBBS((ArrayList<Integer>) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY));
        }
        else if (((ArrayList<Object>)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)).get(0) instanceof String) {
            final List<Integer> sbbsList = new ArrayList<>();
            for (final Object o : (ArrayList<Object>) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
                sbbsList.add(Integer.parseInt(o.toString()));
            }
            data = encodeSBBS(sbbsList);
        } else {
            throw new GATKException("Unexpected " + GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY + " type");
        }

    } else {
        throw new GATKException("Unexpected " + GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY + " type");
    }
    return data;
}
 
示例8
@NotNull
private static Genotype createGenotype(boolean germline, @NotNull final VariantHotspot variant,
        @NotNull final ReadContextCounter counter) {
    return new GenotypeBuilder(counter.sample()).DP(counter.depth())
            .AD(new int[] { counter.refSupport(), counter.altSupport() })
            .attribute(READ_CONTEXT_QUALITY, counter.quality())
            .attribute(READ_CONTEXT_COUNT, counter.counts())
            .attribute(READ_CONTEXT_IMPROPER_PAIR, counter.improperPair())
            .attribute(READ_CONTEXT_JITTER, counter.jitter())
            .attribute(RAW_ALLELIC_DEPTH, new int[] { counter.rawRefSupport(), counter.rawAltSupport() })
            .attribute(RAW_ALLELIC_BASE_QUALITY, new int[] { counter.rawRefBaseQuality(), counter.rawAltBaseQuality() })
            .attribute(RAW_DEPTH, counter.rawDepth())
            .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, counter.vaf())
            .alleles(createGenotypeAlleles(germline, variant, counter))
            .make();
}
 
示例9
/**
 * Calculate the truth call quality when we collapse copy-numbers into deletion, neutral (ref) and duplication.
 *
 * @param truthGenotype the genotype containing the posteriors log10 to use for the calculation.
 * @param truthCopyNumber the truth call copy number.
 * @return never {@code null}, a Phred scaled quality score 0 or greater.
 */
private double calculateTruthQuality(final Genotype truthGenotype, final int truthCopyNumber) {
    final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype,
            GS_COPY_NUMBER_POSTERIOR_KEY, () -> new double[truthCopyNumber + 1],
            Double.NEGATIVE_INFINITY);
    final double totalLog10Prob = MathUtils.log10SumLog10(truthPosteriors);
    final double delLog10Prob = MathUtils.log10SumLog10(truthPosteriors, 0, Math.min(truthNeutralCopyNumber, truthPosteriors.length)) - totalLog10Prob;
    final double neutralLog10Prob = truthNeutralCopyNumber >= truthPosteriors.length ? Double.NEGATIVE_INFINITY : truthPosteriors[truthNeutralCopyNumber] - totalLog10Prob;
    final double dupLog10Prob = truthNeutralCopyNumber + 1 >= truthPosteriors.length ? Double.NEGATIVE_INFINITY
            : MathUtils.log10SumLog10(truthPosteriors, truthNeutralCopyNumber + 1, truthPosteriors.length) - totalLog10Prob;
    if (truthCopyNumber < truthNeutralCopyNumber) {
        return -10.0 * MathUtils.approximateLog10SumLog10(neutralLog10Prob, dupLog10Prob);
    } else if (truthCopyNumber > truthNeutralCopyNumber) {
        return -10.0 * MathUtils.approximateLog10SumLog10(neutralLog10Prob, delLog10Prob);
    } else {
        return -10.0 * MathUtils.approximateLog10SumLog10(delLog10Prob, dupLog10Prob);
    }
}
 
示例10
/**
 * Add information from this Genotype to this band.
 *
 * @param pos Current genomic position. Must be 1 base after the previous position
 * @param genotype A non-null Genotype with TLOD and DP attributes
 */
@Override
public void add(final int pos, final int newEnd, final Genotype genotype) {
    Utils.nonNull(genotype, "genotype cannot be null");
    Utils.validateArg( pos == end + 1,"adding genotype at pos " + pos + " isn't contiguous with previous end " + end);
    // Make sure the LOD is within the bounds of this band
    final double currentLOD = Double.parseDouble(genotype.getExtendedAttribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY).toString());
    if ( !withinBounds(currentLOD)) {
        throw new IllegalArgumentException("cannot add a genotype with LOD=" + currentLOD + " because it's not within bounds ["
                + this.getLODLowerBound() + ',' + this.getLODUpperBound() + ')');
    }

    if( minBlockLOD == Double.POSITIVE_INFINITY || currentLOD < minBlockLOD) {
        minBlockLOD = currentLOD;
    }

    end = newEnd;
    DPs.add(Math.max(genotype.getDP(), 0)); // DP must be >= 0
}
 
示例11
@Override
public void annotate(final ReferenceContext ref,
                     final VariantContext vc,
                     final Genotype g,
                     final GenotypeBuilder gb,
                     final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");

    if ( g == null || !g.isCalled() || likelihoods == null) {
        return;
    }
    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());

    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a  subset of AlleleLikelihoods alleles " + likelihoods.alleles());

    gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods));
}
 
示例12
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
    Genotype tumorGenotype = vc.getGenotype(tumorSample);
    final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
            tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1);
    final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
            tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1);

    if (posteriorProbabilities == null || mapAlleleFractionEstimates == null){
        return;
    }

    final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities);

    if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()){
        return;
    }

    if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD &&
            mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD){
        filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
    }
}
 
示例13
private void assertVariantsAreCoveredBySegments(final List<VariantContext> variants,
                                                final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
    for (final VariantContext variant : variants) {
        final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> matches =
                variantSegments.stream()
                .filter(s -> new SimpleInterval(variant).equals(s.getSegment().getInterval()))
                .collect(Collectors.toList());
        Assert.assertFalse(matches.isEmpty());
        for (final Genotype genotype : variant.getGenotypes()) {
            final boolean discovery = genotype.getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString().equals(XHMMSegmentGenotyper.DISCOVERY_TRUE);
            if (discovery) {
                Assert.assertTrue(matches.stream().anyMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
            } else {
                Assert.assertTrue(matches.stream().noneMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
            }
        }
    }
}
 
示例14
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(","))
                    .mapToDouble(Double::parseDouble).toArray();

            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(
                    new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()),
                    0.000, call, -10.0 * exactLog10Prob, Double.NaN,  Double.NaN, Double.NaN,
                      -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum)
            );
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
 
示例15
@Test
public void testGenotypeConcordanceDetermineStateFilter() throws Exception {
    final Set<String> filters = new HashSet<String>(Arrays.asList("BAD!"));

    // Filtering on the variant context
    final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
    final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make();

    final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T);
    final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T));
    final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make();

    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0);

    // Filtering on the genotype
    final List<String> gtFilters = new ArrayList<String>(Arrays.asList("WICKED"));
    final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C);
    final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make();
    final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make();

    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
    testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
    testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
}
 
示例16
boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) {
    final HomRefBlock currentHomRefBlock = (HomRefBlock)currentBlock;
    return currentHomRefBlock != null
            && currentHomRefBlock.withinBounds(Math.min(g.getGQ(), MAX_GENOTYPE_QUAL))
            && currentHomRefBlock.getPloidy() == g.getPloidy()
            && (currentHomRefBlock.getMinPLs() == null || !g.hasPL() || (currentHomRefBlock.getMinPLs().length == g.getPL().length));
}
 
示例17
private void run() {
    fileWriter.writeHeader(fileReader.getFileHeader());
    for (final VariantContext context : fileReader) {

        final VariantContextBuilder builder = new VariantContextBuilder(context);
        final List<Genotype> genotypes = Lists.newArrayList();

        for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
            Genotype genotype = context.getGenotype(genotypeIndex);

            if (genotype.hasAD() && genotype.hasDP()) {
                int[] ad = genotype.getAD();
                int total = 0;
                boolean updateRecord = false;
                for (int i = 0; i < ad.length; i++) {
                    int adValue = ad[i];
                    if (adValue < 0) {
                        updateRecord = true;
                        ad[i] = Math.abs(adValue);
                    }
                    total += Math.abs(adValue);
                }

                if (updateRecord) {
                    LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
                    Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
                    genotypes.add(updated);
                }
                else {
                    genotypes.add(genotype);
                }
            }
        }

        builder.genotypes(genotypes);
        fileWriter.add(builder.make());
    }
}
 
示例18
@Test
public void testGVCFModeGenotypePosteriors() throws Exception {
    Utils.resetRandomGenerator();

    final String inputFileName = NA12878_20_21_WGS_bam;
    final String referenceFileName =b37_reference_20_21;

    final File output = createTempFile("testGVCFModeIsConsistentWithPastResults", ".g.vcf");

    final String[] args = {
            "-I", inputFileName,
            "-R", referenceFileName,
            "-L", "20:10000000-10100000",
            "-O", output.getAbsolutePath(),
            "--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
            "--" + GenotypeCalculationArgumentCollection.SUPPORTING_CALLSET_LONG_NAME,
                largeFileTestDir + "1000G.phase3.broad.withGenotypes.chr20.10100000.vcf",
            "--" + GenotypeCalculationArgumentCollection.NUM_REF_SAMPLES_LONG_NAME, "2500",
            "-pairHMM", "AVX_LOGLESS_CACHING",
            "--" + AssemblyBasedCallerArgumentCollection.ALLELE_EXTENSION_LONG_NAME, "2",
            "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"
    };

    runCommandLine(args);

    Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());

    for (final VariantContext vc : results.getRight()) {
        final Genotype g = vc.getGenotype(0);
        if (g.hasDP() && g.getDP() > 0 && g.hasGQ() && g.getGQ() > 0) {
            Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
        }
        if (isGVCFReferenceBlock(vc) ) {
            Assert.assertTrue(!vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
        }
        else if (!vc.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE)){      //there are some variants that don't have non-symbolic alts
            Assert.assertTrue(vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
        }
    }
}
 
示例19
@Override
public void submit(VariantContext vc) {
    Utils.nonNull(vc);
    Utils.validateArg(vc.hasGenotypes(), "GVCF assumes that the VariantContext has genotypes");
    Utils.validateArg(vc.getGenotypes().size() == 1, () -> "GVCF assumes that the VariantContext has exactly one genotype but saw " + vc.getGenotypes().size());

    if (sampleName == null) {
        sampleName = vc.getGenotype(0).getSampleName();
    }

    if (currentBlock != null && !currentBlock.isContiguous(vc)) {
        // we've made a non-contiguous step (across interval, onto another chr), so finalize
        emitCurrentBlock();
    }

    final Genotype g = vc.getGenotype(0);
    if (g.isHomRef() && vc.hasAlternateAllele(Allele.NON_REF_ALLELE) && vc.isBiallelic()) {
        // create bands
        final VariantContext maybeCompletedBand = addHomRefSite(vc, g);
        if (maybeCompletedBand != null) {
            toOutput.add(maybeCompletedBand);
        }
    } else {
        // g is variant, so flush the bands and emit vc
        emitCurrentBlock();
        nextAvailableStart = vc.getEnd();
        contigOfNextAvailableStart = vc.getContig();
        toOutput.add(vc);
    }
}
 
示例20
@Override
Genotype createHomRefGenotype(final String sampleName, final boolean floorBlock) {
    final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, getRef()));  //FIXME: for somatic stuff we output the genotype as diploid because that's familiar for human
    gb.noAD().noPL().noAttributes(); // clear all attributes

    gb.attribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, minBlockLOD);
    gb.DP(getMedianDP());
    gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP());

    return gb.make();
}
 
示例21
@NotNull
public Optional<SomaticVariant> createVariant(@NotNull final String sample, @Nullable final String reference,
        @Nullable final String rna, @NotNull final VariantContext context) {
    final Genotype genotype = context.getGenotype(sample);

    if (filter.test(context) && AllelicDepth.containsAllelicDepth(genotype)) {
        final AllelicDepth tumorDepth = AllelicDepth.fromGenotype(context.getGenotype(sample));

        final Optional<AllelicDepth> referenceDepth = Optional.ofNullable(reference)
                .flatMap(x -> Optional.ofNullable(context.getGenotype(x)))
                .filter(AllelicDepth::containsAllelicDepth)
                .map(AllelicDepth::fromGenotype);

        final Optional<AllelicDepth> rnaDepth = Optional.ofNullable(rna)
                .flatMap(x -> Optional.ofNullable(context.getGenotype(x)))
                .filter(AllelicDepth::containsAllelicDepth)
                .map(AllelicDepth::fromGenotype);

        if (tumorDepth.totalReadCount() > 0) {
            return Optional.of(createVariantBuilder(tumorDepth, context, canonicalAnnotationFactory))
                    .map(x -> x.rnaDepth(rnaDepth.orElse(null)))
                    .map(x -> x.referenceDepth(referenceDepth.orElse(null)))
                    .map(x -> enrichment.enrich(x, context))
                    .map(ImmutableSomaticVariantImpl.Builder::build);
        }
    }
    return Optional.empty();
}
 
示例22
/**
 * Helper function that combines data from multiple genotypes to a list of data for each allele
 * @param dataByAllele the map to return with the list of values separated by allele
 * @param vc the variant context to get the genotypes from
 * @param preconditions a predicate that must pass for data to be returned
 * @param getData a function that returns the list of data for a genotype. the size must match the number of allele in the dataByAllele map
 * @param filteringEngine
 * @param <T> the type of the data returned
 * @return the dataByAllele map filled in with the list of values for each allele
 */
private static <T> LinkedHashMap<Allele, List<T>> combineDataByAllele(final LinkedHashMap<Allele, List<T>> dataByAllele, final VariantContext vc, Predicate<Genotype> preconditions, Function<Genotype, List<T>> getData, final Mutect2FilteringEngine filteringEngine) {

    // pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele
    vc.getGenotypes().stream().filter(preconditions)
            .forEach(g -> {
                Iterator<T> alleleDataIterator = getData.apply(g).iterator();
                Iterator<List<T>> dataByAlleleIterator = dataByAllele.values().iterator();
                while(alleleDataIterator.hasNext() && dataByAlleleIterator.hasNext())
                    dataByAlleleIterator.next().add(alleleDataIterator.next());

            });

    return dataByAllele;
}
 
示例23
public static void assertGenotypesAreEqual(final Genotype actual, final Genotype expected, final List<String> extendedAttributesToIgnore) {
    Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names");
    Assert.assertTrue(CollectionUtils.isEqualCollection(actual.getAlleles(), expected.getAlleles()), "Genotype alleles");
    Assert.assertEquals(actual.getGenotypeString(false), expected.getGenotypeString(false), "Genotype string");
    Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type");

    // filters are the same
    Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields");
    Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered");

    // inline attributes
    Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP");
    Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp");
    Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD");
    Assert.assertEquals(actual.getAD(), expected.getAD(), "Genotype AD");
    Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ");
    Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq");
    Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL: " + actual.toString());
    Assert.assertEquals(actual.getPL(), expected.getPL(), "Genotype PL");

    Assert.assertEquals(actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods");
    Assert.assertEquals(actual.getLikelihoodsString(), expected.getLikelihoodsString(), "Genotype getlikelihoodsString");
    Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods");

    Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype phredScaledQual");
    assertAttributesEquals(filterIgnoredAttributes(actual.getExtendedAttributes(), extendedAttributesToIgnore), filterIgnoredAttributes(expected.getExtendedAttributes(), extendedAttributesToIgnore));
    Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased");
    Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy");
}
 
示例24
@NotNull
public static VariantContext create(@NotNull final SageVariant entry) {
    final List<Genotype> genotypes = Lists.newArrayList();
    for (int i = 0; i < entry.normalAltContexts().size(); i++) {
        ReadContextCounter normalContext = entry.normalAltContexts().get(i);
        genotypes.add(createGenotype(i == 0, entry.variant(), normalContext));
    }

    entry.tumorAltContexts().stream().map(x -> createGenotype(false, entry.variant(), x)).forEach(genotypes::add);
    return createContext(entry, createAlleles(entry.variant()), genotypes, entry.readContext());
}
 
示例25
private final boolean hasArtifact(final Genotype g, final double populationAlleleFrequency) {
    final int altCount = altCount(g);
    if (altCount == 0) {
        return false;
    }
    final int totalCount = (int) MathUtils.sum(g.getAD());

    return germlineProbability(populationAlleleFrequency, altCount, totalCount) < maxGermlineProbability;
}
 
示例26
public void add(@NotNull final VariantContext context) {
    final VariantHotspot hotspot = hotspot(context);
    final Counter counter = map.computeIfAbsent(hotspot, Counter::new);
    final Genotype genotype = context.getGenotype(0);
    if (!hotspot.ref().contains("N") && genotype.hasExtendedAttribute(SageVCF.RAW_ALLELIC_DEPTH)) {
        String rawDepth = (String) genotype.getExtendedAttribute(SageVCF.RAW_ALLELIC_DEPTH);
        int allelicDepth = Integer.parseInt(rawDepth.split(",")[1]);
        if (allelicDepth >= MIN_INPUT_ALLELIC_DEPTH) {
            counter.increment(allelicDepth);
        }
    }
}
 
示例27
public static boolean invalidAF(final VariantContext variant)
{
    // Filter variants with an allelic fraction of less than 10% in the germline
    final Genotype normalData = variant.getGenotype(0);
    double vf = getDoubleValue(normalData, VF);
    double bvf = getDoubleValue(normalData, BVF);
    double ref = getDoubleValue(normalData, REF);
    double refPair = getDoubleValue(normalData, REFPAIR);
    double af = (vf + bvf) / (vf + bvf + ref + refPair);

    return af < MIN_AF;
}
 
示例28
/**
 *
 * @return the number of reads at the given site, calculated as InfoField {@link VCFConstants#DEPTH_KEY} minus the
 * format field {@link GATKVCFConstants#MIN_DP_FORMAT_KEY} or DP of each of the HomRef genotypes at that site
 * @throws UserException.BadInput if the {@link VCFConstants#DEPTH_KEY} is missing or if the calculated depth is <= 0
 */
@VisibleForTesting
private static int getNumOfReads(final VariantContext vc) {
    if(vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
        int mqDP = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, 0);
        if (mqDP > 0) {
            return mqDP;
        }
    }

    //don't use the full depth because we don't calculate MQ for reference blocks
    //don't count spanning deletion calls towards number of reads
    int numOfReads = vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
    if(vc.hasGenotypes()) {
        for(final Genotype gt : vc.getGenotypes()) {
           if(hasReferenceDepth(gt)) {
                //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
                if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
                    numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
                } else if (gt.hasDP()) {
                    numOfReads -= gt.getDP();
                }
            }
            else if(hasSpanningDeletionAllele(gt)) {
                //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
                if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
                    numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
                } else if (gt.hasDP()) {
                    numOfReads -= gt.getDP();
                }
            }
        }
    }
    if (numOfReads <= 0){
        numOfReads = -1;  //return -1 to result in a NaN
    }
    return numOfReads;
}
 
示例29
@Override
public void update1(VariantContext eval, ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext) {
    Iterator<Genotype> it = eval.getGenotypes().iterator();
    while (it.hasNext()){
        Genotype g = it.next();
        if (g.isCalled() && !g.isFiltered()){
            nCalledNotFiltered++;
        }
        else if (g.isNoCall() || g.isFiltered()){
            nNoCallOrFiltered++;
        }
    }
}
 
示例30
/**
 * Checks if the two variants have the same genotypes for the selected samples
 *
 * @param vc the variant rod VariantContext.
 * @param compVCs the comparison VariantContext
 * @return true if VariantContexts are concordant, false otherwise
 */
private boolean isConcordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) {
        return false;
    }

    // if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant.
    if (noSamplesSpecified) {
        return true;
    }

    // make a list of all samples contained in this variant VC that are being tracked by the user command line arguments.
    final Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);

    // check if we can find all samples from the variant rod in the comp rod.
    for (final String sample : variantSamples) {
        boolean foundSample = false;
        for (final VariantContext compVC : compVCs) {
            final Genotype varG = vc.getGenotype(sample);
            final Genotype compG = compVC.getGenotype(sample);
            if (haveSameGenotypes(varG, compG)) {
                foundSample = true;
                break;
            }
        }
        // if at least one sample doesn't have the same genotype, we don't have concordance
        if (!foundSample) {
            return false;
        }
    }
    return true;
}