Java源码示例:htsjdk.variant.variantcontext.Genotype
示例1
@Test
public void filterOutHetSinglton() {
VariantContextBuilder b = new VariantContextBuilder();
Allele a1 = Allele.create("A", true);
Allele a2 = Allele.create("T", false);
final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));
// this has a het singleton.
Collection<Genotype> genotypes = new ArrayList<>();
genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
genotypes.add(GenotypeBuilder.create("donor3", allelesRef));
VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
Assert.assertNotNull(vc);
Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();
VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
boolean t1 = f.filterOut(vc);
Assert.assertFalse(t1);
}
示例2
@Override
//template method for calculating strand bias annotations using the three different methods
public Map<String, Object> annotate(final ReferenceContext ref,
final VariantContext vc,
final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
Utils.nonNull(vc);
if ( !vc.isVariant() ) {
return Collections.emptyMap();
}
// if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
if ( vc.hasGenotypes() ) {
for (final Genotype g : vc.getGenotypes()) {
if (g.hasAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
return calculateAnnotationFromGTfield(vc.getGenotypes());
}
}
}
if (likelihoods != null) {
return calculateAnnotationFromLikelihoods(likelihoods, vc);
}
return Collections.emptyMap();
}
示例3
@NotNull
public static VariantContext enrich(@NotNull final VariantContext context) {
if (!context.isIndel() && !context.isSNP() ) {
return context;
}
final VariantContextBuilder contextBuilder = new VariantContextBuilder(context).noGenotypes();
final List<Allele> alleles = context.getAlleles();
final Function<Allele, String> alleleKey = alleleKey(context);
final List<Genotype> updatedGenotypes = Lists.newArrayList();
for (Genotype genotype : context.getGenotypes()) {
if (!genotype.hasAD() && hasRequiredAttributes(genotype, alleles, alleleKey)) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).AD(readAD(genotype, alleles, alleleKey));
updatedGenotypes.add(genotypeBuilder.make());
} else {
updatedGenotypes.add(genotype);
}
}
return contextBuilder.genotypes(updatedGenotypes).make();
}
示例4
@NotNull
private static int[] readAD(@NotNull final Genotype tumorGenotype, @NotNull final List<Allele> alleles,
@NotNull Function<Allele, String> alleleKey) {
final List<Integer> alleleAds = alleles.stream().map(allele -> {
final String[] alleleADs = tumorGenotype.getExtendedAttribute(alleleKey.apply(allele), "0").toString().split(SEPARATOR);
if (alleleADs.length > 0) {
try {
return Integer.parseInt(alleleADs[0]);
} catch (final NumberFormatException e) {
return 0;
}
}
return 0;
}).collect(Collectors.toList());
return alleleAds.stream().mapToInt(Integer::intValue).toArray();
}
示例5
/**
* Initialize cache of allele anyploid indices
*
* Initialize the cache of PL index to a list of alleles for each ploidy.
*
* @param vc Variant Context
*/
private void initalizeAlleleAnyploidIndicesCache(final VariantContext vc) {
if (vc.getType() != VariantContext.Type.NO_VARIATION) { // Bypass if not a variant
for (final Genotype g : vc.getGenotypes()) {
// Make a new entry if the we have not yet cached a PL to allele indices map for this ploidy and allele count
// skip if there are no PLs -- this avoids hanging on high-allelic somatic samples, for example, where
// there's no need for the PL indices since they don't exist
if (g.getPloidy() != 0 && (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles())) {
if (vc.getGenotypes().stream().anyMatch(Genotype::hasLikelihoods)) {
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy());
ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles());
}
}
}
}
}
示例6
@NotNull
private VariantContext create(@NotNull final BaseDepth snp) {
final List<Allele> alleles = Lists.newArrayList();
alleles.add(Allele.create(snp.ref().toString(), true));
alleles.add(Allele.create(snp.alt().toString(), false));
final List<Integer> adField = Lists.newArrayList();
adField.add(snp.refSupport());
adField.add(snp.altSupport());
final Genotype normal = new GenotypeBuilder(config.primaryReference()).DP(snp.readDepth())
.AD(adField.stream().mapToInt(i -> i).toArray())
.alleles(alleles)
.make();
final VariantContextBuilder builder = new VariantContextBuilder().chr(snp.chromosome())
.start(snp.position())
.computeEndFromAlleles(alleles, (int) snp.position())
.genotypes(normal)
.alleles(alleles);
return builder.make();
}
示例7
@SuppressWarnings("unchecked")
public static int[] getStrandCounts(final Genotype g) {
int[] data;
if ( g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY).getClass().equals(String.class)) {
final String sbbsString = (String)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY);
data = encodeSBBS(sbbsString);
} else if (g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY).getClass().equals(ArrayList.class)) {
if (((ArrayList<Object>)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)).get(0) instanceof Integer) {
data = encodeSBBS((ArrayList<Integer>) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY));
}
else if (((ArrayList<Object>)g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)).get(0) instanceof String) {
final List<Integer> sbbsList = new ArrayList<>();
for (final Object o : (ArrayList<Object>) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
sbbsList.add(Integer.parseInt(o.toString()));
}
data = encodeSBBS(sbbsList);
} else {
throw new GATKException("Unexpected " + GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY + " type");
}
} else {
throw new GATKException("Unexpected " + GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY + " type");
}
return data;
}
示例8
@NotNull
private static Genotype createGenotype(boolean germline, @NotNull final VariantHotspot variant,
@NotNull final ReadContextCounter counter) {
return new GenotypeBuilder(counter.sample()).DP(counter.depth())
.AD(new int[] { counter.refSupport(), counter.altSupport() })
.attribute(READ_CONTEXT_QUALITY, counter.quality())
.attribute(READ_CONTEXT_COUNT, counter.counts())
.attribute(READ_CONTEXT_IMPROPER_PAIR, counter.improperPair())
.attribute(READ_CONTEXT_JITTER, counter.jitter())
.attribute(RAW_ALLELIC_DEPTH, new int[] { counter.rawRefSupport(), counter.rawAltSupport() })
.attribute(RAW_ALLELIC_BASE_QUALITY, new int[] { counter.rawRefBaseQuality(), counter.rawAltBaseQuality() })
.attribute(RAW_DEPTH, counter.rawDepth())
.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, counter.vaf())
.alleles(createGenotypeAlleles(germline, variant, counter))
.make();
}
示例9
/**
* Calculate the truth call quality when we collapse copy-numbers into deletion, neutral (ref) and duplication.
*
* @param truthGenotype the genotype containing the posteriors log10 to use for the calculation.
* @param truthCopyNumber the truth call copy number.
* @return never {@code null}, a Phred scaled quality score 0 or greater.
*/
private double calculateTruthQuality(final Genotype truthGenotype, final int truthCopyNumber) {
final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype,
GS_COPY_NUMBER_POSTERIOR_KEY, () -> new double[truthCopyNumber + 1],
Double.NEGATIVE_INFINITY);
final double totalLog10Prob = MathUtils.log10SumLog10(truthPosteriors);
final double delLog10Prob = MathUtils.log10SumLog10(truthPosteriors, 0, Math.min(truthNeutralCopyNumber, truthPosteriors.length)) - totalLog10Prob;
final double neutralLog10Prob = truthNeutralCopyNumber >= truthPosteriors.length ? Double.NEGATIVE_INFINITY : truthPosteriors[truthNeutralCopyNumber] - totalLog10Prob;
final double dupLog10Prob = truthNeutralCopyNumber + 1 >= truthPosteriors.length ? Double.NEGATIVE_INFINITY
: MathUtils.log10SumLog10(truthPosteriors, truthNeutralCopyNumber + 1, truthPosteriors.length) - totalLog10Prob;
if (truthCopyNumber < truthNeutralCopyNumber) {
return -10.0 * MathUtils.approximateLog10SumLog10(neutralLog10Prob, dupLog10Prob);
} else if (truthCopyNumber > truthNeutralCopyNumber) {
return -10.0 * MathUtils.approximateLog10SumLog10(neutralLog10Prob, delLog10Prob);
} else {
return -10.0 * MathUtils.approximateLog10SumLog10(delLog10Prob, dupLog10Prob);
}
}
示例10
/**
* Add information from this Genotype to this band.
*
* @param pos Current genomic position. Must be 1 base after the previous position
* @param genotype A non-null Genotype with TLOD and DP attributes
*/
@Override
public void add(final int pos, final int newEnd, final Genotype genotype) {
Utils.nonNull(genotype, "genotype cannot be null");
Utils.validateArg( pos == end + 1,"adding genotype at pos " + pos + " isn't contiguous with previous end " + end);
// Make sure the LOD is within the bounds of this band
final double currentLOD = Double.parseDouble(genotype.getExtendedAttribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY).toString());
if ( !withinBounds(currentLOD)) {
throw new IllegalArgumentException("cannot add a genotype with LOD=" + currentLOD + " because it's not within bounds ["
+ this.getLODLowerBound() + ',' + this.getLODUpperBound() + ')');
}
if( minBlockLOD == Double.POSITIVE_INFINITY || currentLOD < minBlockLOD) {
minBlockLOD = currentLOD;
}
end = newEnd;
DPs.add(Math.max(genotype.getDP(), 0)); // DP must be >= 0
}
示例11
@Override
public void annotate(final ReferenceContext ref,
final VariantContext vc,
final Genotype g,
final GenotypeBuilder gb,
final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
Utils.nonNull(gb, "gb is null");
Utils.nonNull(vc, "vc is null");
if ( g == null || !g.isCalled() || likelihoods == null) {
return;
}
final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());
// make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a subset of AlleleLikelihoods alleles " + likelihoods.alleles());
gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods));
}
示例12
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) {
Genotype tumorGenotype = vc.getGenotype(tumorSample);
final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1);
final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1);
if (posteriorProbabilities == null || mapAlleleFractionEstimates == null){
return;
}
final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities);
if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()){
return;
}
if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD &&
mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD){
filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME);
}
}
示例13
private void assertVariantsAreCoveredBySegments(final List<VariantContext> variants,
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
for (final VariantContext variant : variants) {
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> matches =
variantSegments.stream()
.filter(s -> new SimpleInterval(variant).equals(s.getSegment().getInterval()))
.collect(Collectors.toList());
Assert.assertFalse(matches.isEmpty());
for (final Genotype genotype : variant.getGenotypes()) {
final boolean discovery = genotype.getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString().equals(XHMMSegmentGenotyper.DISCOVERY_TRUE);
if (discovery) {
Assert.assertTrue(matches.stream().anyMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
} else {
Assert.assertTrue(matches.stream().noneMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
}
}
}
}
示例14
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
final VCFFileReader reader = new VCFFileReader(vcf, false);
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
reader.iterator().forEachRemaining(vc -> {
final int targetCount = targets.indexRange(vc).size();
for (final Genotype genotype : vc.getGenotypes()) {
final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(","))
.mapToDouble(Double::parseDouble).toArray();
final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
final CopyNumberTriState call = expectedCall(cn);
final double exactLog10Prob = expectedExactLog10(call, cnp);
final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(
new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()),
0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN,
-10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum)
);
result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
}
});
return result;
}
示例15
@Test
public void testGenotypeConcordanceDetermineStateFilter() throws Exception {
final Set<String> filters = new HashSet<String>(Arrays.asList("BAD!"));
// Filtering on the variant context
final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C);
final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make();
final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T);
final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T));
final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make();
testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0);
testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0);
// Filtering on the genotype
final List<String> gtFilters = new ArrayList<String>(Arrays.asList("WICKED"));
final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C);
final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make();
final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make();
testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
}
示例16
boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) {
final HomRefBlock currentHomRefBlock = (HomRefBlock)currentBlock;
return currentHomRefBlock != null
&& currentHomRefBlock.withinBounds(Math.min(g.getGQ(), MAX_GENOTYPE_QUAL))
&& currentHomRefBlock.getPloidy() == g.getPloidy()
&& (currentHomRefBlock.getMinPLs() == null || !g.hasPL() || (currentHomRefBlock.getMinPLs().length == g.getPL().length));
}
示例17
private void run() {
fileWriter.writeHeader(fileReader.getFileHeader());
for (final VariantContext context : fileReader) {
final VariantContextBuilder builder = new VariantContextBuilder(context);
final List<Genotype> genotypes = Lists.newArrayList();
for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
Genotype genotype = context.getGenotype(genotypeIndex);
if (genotype.hasAD() && genotype.hasDP()) {
int[] ad = genotype.getAD();
int total = 0;
boolean updateRecord = false;
for (int i = 0; i < ad.length; i++) {
int adValue = ad[i];
if (adValue < 0) {
updateRecord = true;
ad[i] = Math.abs(adValue);
}
total += Math.abs(adValue);
}
if (updateRecord) {
LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
genotypes.add(updated);
}
else {
genotypes.add(genotype);
}
}
}
builder.genotypes(genotypes);
fileWriter.add(builder.make());
}
}
示例18
@Test
public void testGVCFModeGenotypePosteriors() throws Exception {
Utils.resetRandomGenerator();
final String inputFileName = NA12878_20_21_WGS_bam;
final String referenceFileName =b37_reference_20_21;
final File output = createTempFile("testGVCFModeIsConsistentWithPastResults", ".g.vcf");
final String[] args = {
"-I", inputFileName,
"-R", referenceFileName,
"-L", "20:10000000-10100000",
"-O", output.getAbsolutePath(),
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"--" + GenotypeCalculationArgumentCollection.SUPPORTING_CALLSET_LONG_NAME,
largeFileTestDir + "1000G.phase3.broad.withGenotypes.chr20.10100000.vcf",
"--" + GenotypeCalculationArgumentCollection.NUM_REF_SAMPLES_LONG_NAME, "2500",
"-pairHMM", "AVX_LOGLESS_CACHING",
"--" + AssemblyBasedCallerArgumentCollection.ALLELE_EXTENSION_LONG_NAME, "2",
"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"
};
runCommandLine(args);
Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
for (final VariantContext vc : results.getRight()) {
final Genotype g = vc.getGenotype(0);
if (g.hasDP() && g.getDP() > 0 && g.hasGQ() && g.getGQ() > 0) {
Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
}
if (isGVCFReferenceBlock(vc) ) {
Assert.assertTrue(!vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
}
else if (!vc.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE)){ //there are some variants that don't have non-symbolic alts
Assert.assertTrue(vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
}
}
}
示例19
@Override
public void submit(VariantContext vc) {
Utils.nonNull(vc);
Utils.validateArg(vc.hasGenotypes(), "GVCF assumes that the VariantContext has genotypes");
Utils.validateArg(vc.getGenotypes().size() == 1, () -> "GVCF assumes that the VariantContext has exactly one genotype but saw " + vc.getGenotypes().size());
if (sampleName == null) {
sampleName = vc.getGenotype(0).getSampleName();
}
if (currentBlock != null && !currentBlock.isContiguous(vc)) {
// we've made a non-contiguous step (across interval, onto another chr), so finalize
emitCurrentBlock();
}
final Genotype g = vc.getGenotype(0);
if (g.isHomRef() && vc.hasAlternateAllele(Allele.NON_REF_ALLELE) && vc.isBiallelic()) {
// create bands
final VariantContext maybeCompletedBand = addHomRefSite(vc, g);
if (maybeCompletedBand != null) {
toOutput.add(maybeCompletedBand);
}
} else {
// g is variant, so flush the bands and emit vc
emitCurrentBlock();
nextAvailableStart = vc.getEnd();
contigOfNextAvailableStart = vc.getContig();
toOutput.add(vc);
}
}
示例20
@Override
Genotype createHomRefGenotype(final String sampleName, final boolean floorBlock) {
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, getRef())); //FIXME: for somatic stuff we output the genotype as diploid because that's familiar for human
gb.noAD().noPL().noAttributes(); // clear all attributes
gb.attribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, minBlockLOD);
gb.DP(getMedianDP());
gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP());
return gb.make();
}
示例21
@NotNull
public Optional<SomaticVariant> createVariant(@NotNull final String sample, @Nullable final String reference,
@Nullable final String rna, @NotNull final VariantContext context) {
final Genotype genotype = context.getGenotype(sample);
if (filter.test(context) && AllelicDepth.containsAllelicDepth(genotype)) {
final AllelicDepth tumorDepth = AllelicDepth.fromGenotype(context.getGenotype(sample));
final Optional<AllelicDepth> referenceDepth = Optional.ofNullable(reference)
.flatMap(x -> Optional.ofNullable(context.getGenotype(x)))
.filter(AllelicDepth::containsAllelicDepth)
.map(AllelicDepth::fromGenotype);
final Optional<AllelicDepth> rnaDepth = Optional.ofNullable(rna)
.flatMap(x -> Optional.ofNullable(context.getGenotype(x)))
.filter(AllelicDepth::containsAllelicDepth)
.map(AllelicDepth::fromGenotype);
if (tumorDepth.totalReadCount() > 0) {
return Optional.of(createVariantBuilder(tumorDepth, context, canonicalAnnotationFactory))
.map(x -> x.rnaDepth(rnaDepth.orElse(null)))
.map(x -> x.referenceDepth(referenceDepth.orElse(null)))
.map(x -> enrichment.enrich(x, context))
.map(ImmutableSomaticVariantImpl.Builder::build);
}
}
return Optional.empty();
}
示例22
/**
* Helper function that combines data from multiple genotypes to a list of data for each allele
* @param dataByAllele the map to return with the list of values separated by allele
* @param vc the variant context to get the genotypes from
* @param preconditions a predicate that must pass for data to be returned
* @param getData a function that returns the list of data for a genotype. the size must match the number of allele in the dataByAllele map
* @param filteringEngine
* @param <T> the type of the data returned
* @return the dataByAllele map filled in with the list of values for each allele
*/
private static <T> LinkedHashMap<Allele, List<T>> combineDataByAllele(final LinkedHashMap<Allele, List<T>> dataByAllele, final VariantContext vc, Predicate<Genotype> preconditions, Function<Genotype, List<T>> getData, final Mutect2FilteringEngine filteringEngine) {
// pull all the allele specific data out of each genotype (that passes preconditions) and add it to the list for the allele
vc.getGenotypes().stream().filter(preconditions)
.forEach(g -> {
Iterator<T> alleleDataIterator = getData.apply(g).iterator();
Iterator<List<T>> dataByAlleleIterator = dataByAllele.values().iterator();
while(alleleDataIterator.hasNext() && dataByAlleleIterator.hasNext())
dataByAlleleIterator.next().add(alleleDataIterator.next());
});
return dataByAllele;
}
示例23
public static void assertGenotypesAreEqual(final Genotype actual, final Genotype expected, final List<String> extendedAttributesToIgnore) {
Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names");
Assert.assertTrue(CollectionUtils.isEqualCollection(actual.getAlleles(), expected.getAlleles()), "Genotype alleles");
Assert.assertEquals(actual.getGenotypeString(false), expected.getGenotypeString(false), "Genotype string");
Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type");
// filters are the same
Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields");
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered");
// inline attributes
Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP");
Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp");
Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD");
Assert.assertEquals(actual.getAD(), expected.getAD(), "Genotype AD");
Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ");
Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq");
Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL: " + actual.toString());
Assert.assertEquals(actual.getPL(), expected.getPL(), "Genotype PL");
Assert.assertEquals(actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods");
Assert.assertEquals(actual.getLikelihoodsString(), expected.getLikelihoodsString(), "Genotype getlikelihoodsString");
Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods");
Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype phredScaledQual");
assertAttributesEquals(filterIgnoredAttributes(actual.getExtendedAttributes(), extendedAttributesToIgnore), filterIgnoredAttributes(expected.getExtendedAttributes(), extendedAttributesToIgnore));
Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased");
Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy");
}
示例24
@NotNull
public static VariantContext create(@NotNull final SageVariant entry) {
final List<Genotype> genotypes = Lists.newArrayList();
for (int i = 0; i < entry.normalAltContexts().size(); i++) {
ReadContextCounter normalContext = entry.normalAltContexts().get(i);
genotypes.add(createGenotype(i == 0, entry.variant(), normalContext));
}
entry.tumorAltContexts().stream().map(x -> createGenotype(false, entry.variant(), x)).forEach(genotypes::add);
return createContext(entry, createAlleles(entry.variant()), genotypes, entry.readContext());
}
示例25
private final boolean hasArtifact(final Genotype g, final double populationAlleleFrequency) {
final int altCount = altCount(g);
if (altCount == 0) {
return false;
}
final int totalCount = (int) MathUtils.sum(g.getAD());
return germlineProbability(populationAlleleFrequency, altCount, totalCount) < maxGermlineProbability;
}
示例26
public void add(@NotNull final VariantContext context) {
final VariantHotspot hotspot = hotspot(context);
final Counter counter = map.computeIfAbsent(hotspot, Counter::new);
final Genotype genotype = context.getGenotype(0);
if (!hotspot.ref().contains("N") && genotype.hasExtendedAttribute(SageVCF.RAW_ALLELIC_DEPTH)) {
String rawDepth = (String) genotype.getExtendedAttribute(SageVCF.RAW_ALLELIC_DEPTH);
int allelicDepth = Integer.parseInt(rawDepth.split(",")[1]);
if (allelicDepth >= MIN_INPUT_ALLELIC_DEPTH) {
counter.increment(allelicDepth);
}
}
}
示例27
public static boolean invalidAF(final VariantContext variant)
{
// Filter variants with an allelic fraction of less than 10% in the germline
final Genotype normalData = variant.getGenotype(0);
double vf = getDoubleValue(normalData, VF);
double bvf = getDoubleValue(normalData, BVF);
double ref = getDoubleValue(normalData, REF);
double refPair = getDoubleValue(normalData, REFPAIR);
double af = (vf + bvf) / (vf + bvf + ref + refPair);
return af < MIN_AF;
}
示例28
/**
*
* @return the number of reads at the given site, calculated as InfoField {@link VCFConstants#DEPTH_KEY} minus the
* format field {@link GATKVCFConstants#MIN_DP_FORMAT_KEY} or DP of each of the HomRef genotypes at that site
* @throws UserException.BadInput if the {@link VCFConstants#DEPTH_KEY} is missing or if the calculated depth is <= 0
*/
@VisibleForTesting
private static int getNumOfReads(final VariantContext vc) {
if(vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
int mqDP = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, 0);
if (mqDP > 0) {
return mqDP;
}
}
//don't use the full depth because we don't calculate MQ for reference blocks
//don't count spanning deletion calls towards number of reads
int numOfReads = vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
if(vc.hasGenotypes()) {
for(final Genotype gt : vc.getGenotypes()) {
if(hasReferenceDepth(gt)) {
//site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
} else if (gt.hasDP()) {
numOfReads -= gt.getDP();
}
}
else if(hasSpanningDeletionAllele(gt)) {
//site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution
if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) {
numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString());
} else if (gt.hasDP()) {
numOfReads -= gt.getDP();
}
}
}
}
if (numOfReads <= 0){
numOfReads = -1; //return -1 to result in a NaN
}
return numOfReads;
}
示例29
@Override
public void update1(VariantContext eval, ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext) {
Iterator<Genotype> it = eval.getGenotypes().iterator();
while (it.hasNext()){
Genotype g = it.next();
if (g.isCalled() && !g.isFiltered()){
nCalledNotFiltered++;
}
else if (g.isNoCall() || g.isFiltered()){
nNoCallOrFiltered++;
}
}
}
示例30
/**
* Checks if the two variants have the same genotypes for the selected samples
*
* @param vc the variant rod VariantContext.
* @param compVCs the comparison VariantContext
* @return true if VariantContexts are concordant, false otherwise
*/
private boolean isConcordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
if (vc == null || compVCs == null || compVCs.isEmpty()) {
return false;
}
// if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant.
if (noSamplesSpecified) {
return true;
}
// make a list of all samples contained in this variant VC that are being tracked by the user command line arguments.
final Set<String> variantSamples = vc.getSampleNames();
variantSamples.retainAll(samples);
// check if we can find all samples from the variant rod in the comp rod.
for (final String sample : variantSamples) {
boolean foundSample = false;
for (final VariantContext compVC : compVCs) {
final Genotype varG = vc.getGenotype(sample);
final Genotype compG = compVC.getGenotype(sample);
if (haveSameGenotypes(varG, compG)) {
foundSample = true;
break;
}
}
// if at least one sample doesn't have the same genotype, we don't have concordance
if (!foundSample) {
return false;
}
}
return true;
}