Java源码示例:htsjdk.samtools.ValidationStringency

示例1
@Override
public GTFRecord next() {
    final TabbedTextFileWithHeaderParser.Row row = it.next();
    if (row.getFields().length != GTFColumnLabels.length) {
        throw new AnnotationException("Wrong number of fields in GTF file " + gtfFile + " at line " +
                row.getCurrentLine());
    }
    final GTFRecord ret = parseLine(row);
    if (validationStringency != ValidationStringency.SILENT) {
        final List<String> errors = ret.validate();
        if (errors != null && !errors.isEmpty()) {
            final String message = String.format(
                    "Invalid GTF line: \n%s\nProblems:\n%s",
                    row.getCurrentLine(),
                    CollectionUtil.join(errors, "\n"));
            if (validationStringency == ValidationStringency.STRICT) {
                throw new AnnotationException(message);
            } else {
                LOG.warn(message);
            }
        }
    }
    progressLogger.record(ret.getChromosome(), ret.getStart());
    return ret;
}
 
示例2
private static void dist(File file, byte defaultQualityScore) throws IllegalArgumentException, IOException,
		IllegalAccessException {
	InputStream is = new FileInputStream(file);
	CramHeader header = CramIO.readCramHeader(is);
	Container c = null;
	ContainerParser parser = new ContainerParser(header.getSamFileHeader());
	ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(10000);

	long[] freq = new long[255];
	while ((c = ContainerIO.readContainer(header.getVersion(), is)) != null && !c.isEOF()) {
		parser.getRecords(c, records, ValidationStringency.SILENT);

		CramNormalizer.restoreQualityScores(defaultQualityScore, records);
		for (CramCompressionRecord record : records) {
			for (byte b : record.qualityScores)
				freq[b & 0xFF]++;
		}
		records.clear();
	}
	print(freq, defaultQualityScore, System.out);
}
 
示例3
@Test(expectedExceptions = UserException.class)
public void testNonStrictBAM() {
    final File normalOutputFile = createTempFile("normal-test",".txt");
    final File tumorOutputFile = createTempFile("tumor-test",".txt");

    final String[] arguments = {
            "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(),
            "--VALIDATION_STRINGENCY", ValidationStringency.STRICT.toString()
    };
    runCommandLine(arguments);
    //should catch SAMFormatException and throw new UserException with --VALIDATION_STRINGENCY STRICT
}
 
示例4
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
示例5
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
示例6
private FingerprintIdDetails createUnknownFP(final Path samFile, final SAMRecord rec) {
    final PicardException e = new PicardException("Found read with no readgroup: " + rec.getReadName() + " in file: " + samFile);
    if (validationStringency != ValidationStringency.STRICT) {
        final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("<UNKNOWN>:::" + samFile.toUri().toString());
        readGroupRecord.setLibrary("<UNKNOWN>");
        readGroupRecord.setSample(defaultSampleID);
        readGroupRecord.setPlatformUnit("<UNKNOWN>.0.ZZZ");

        if (validationStringency != ValidationStringency.SILENT && missingRGFiles.add(samFile)) {
            log.warn(e.getMessage());
            log.warn("further messages from this file will be suppressed");
        }

        return new FingerprintIdDetails(readGroupRecord, samFile.toUri().toString());
    } else {
        log.error(e.getMessage());
        throw e;
    }
}
 
示例7
/** Ensures that an exception is thrown when we encounter a tile without phasing/pre-phasing metrics. */
@Test(expectedExceptions = PicardException.class)
public void testMissingPhasingValuesStrict() {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.STRICT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
示例8
/** Silently continue if we encounter a tile without phasing/pre-phasing metrics. */
@Test
public void testMissingPhasingValuesSilent() throws IOException {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.SILENT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
示例9
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
示例10
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex,
		ValidationStringency stringency, boolean useIntelInflater) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	SamInputResource resource = SamInputResource.of(in);
	if (inIndex != null) {
		resource.index(inIndex);
	}
	if (useIntelInflater) {
		readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor());
	}
	return readerFactory.open(resource);
}
 
示例11
/** Does not close the stream. */
public static SAMFileHeader readSAMHeaderFrom(
	final InputStream in, final Configuration conf)
{
	final ValidationStringency
		stringency = getValidationStringency(conf);
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}

	final ReferenceSource refSource = getReferenceSource(conf);
	if (null != refSource) {
		readerFactory.referenceSource(refSource);
	}
	return readerFactory.open(SamInputResource.of(in)).getFileHeader();
}
 
示例12
public static void main(String[] args) throws IOException {
	if (args.length < 2) {
		System.err.println(
			"Usage: GetSortedBAMHeader input output\n\n"+

			"Reads the BAM header from input (a standard BGZF-compressed BAM "+
			"file), and\nwrites it (BGZF-compressed, no terminator block) to "+
			"output. Sets the sort order\nindicated in the SAM header to "+
			"'coordinate'.");
		System.exit(1);
	}

	final SAMFileHeader h =
			SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
					.setUseAsyncIo(false)
					.open(new File(args[0])).getFileHeader();
	h.setSortOrder(SAMFileHeader.SortOrder.coordinate);

       try (FileOutputStream stream = new FileOutputStream(args[1])) {
           new SAMOutputPreparer().prepareForRecords(stream, SAMFormat.BAM, h);
       }
}
 
示例13
public String printSamBam(InputStream samBamStream, int maxRecords) throws IOException {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader in = new SAMFileReader(samBamStream);
	SAMFileHeader header = in.getFileHeader();
	ByteArrayOutputStream buffer = new ByteArrayOutputStream();
	SAMFileWriter out = new SAMFileWriterFactory().makeSAMWriter(header, true, buffer);
	int i = 0;
	try {
		for (final SAMRecord rec : in) {
			if (i > maxRecords) {
				break;
			}
			out.addAlignment(rec);
			i++;
		}
	} finally {
		closeIfPossible(out);
	}

	if (i > maxRecords) {
		buffer.write("SAM/BAM too long for viewing, truncated here!\n".getBytes());
	}
	
	return buffer.toString();
}
 
示例14
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  SamReaderFactory samReaderFactory = SamReaderFactory
      .makeDefault()
      .validationStringency(stringency)
      .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
  if (includeFileSource) {
    samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
  }
  if (offset == 0) {
    return samReaderFactory.open(resource);
  }
  LOG.info("Initializing seeking reader with the offset of " + offset);
  SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
      false,
      stringency,
      DefaultSAMRecordFactory.getInstance(),
      offset);
  final SeekingReaderAdapter reader =
      new SeekingReaderAdapter(primitiveReader, resource);
  samReaderFactory.reapplyOptions(reader);
  return reader;
}
 
示例15
public static void sortSamBam(File samBamFile, File sortedBamFile) {
	
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		
		reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate);
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
			writer.addAlignment(iterator.next());
		}
		
	} finally {
		closeIfPossible(reader);
		closeIfPossible(writer);
	}
}
 
示例16
@Test(enabled=true, groups={"dropseq", "transcriptome"})
public void test1Enhanced() {
	EnhanceGTFRecords e = new EnhanceGTFRecords();
	GTFParser parser = new GTFParser(GTF_FILE1, ValidationStringency.STRICT);
       List<GTFRecord> records;
       try {
           records = e.enhanceGTFRecords(parser);
       } finally {
           CloserUtil.close(parser);
       }
       Assert.assertNotNull(records);
	
}
 
示例17
@Test(enabled=true, expectedExceptions=java.lang.IllegalStateException.class)
public void testGeneNoExon () {
	EnhanceGTFRecords e = new EnhanceGTFRecords();
	GTFParser parser = new GTFParser(GTF_FILE3, ValidationStringency.STRICT);
       List<GTFRecord> records;
       try {
           records = e.enhanceGTFRecords(parser);
       } finally {
           CloserUtil.close(parser);
       }
       Assert.assertNotNull(records);		
}
 
示例18
private int getReadCounts(final Path result) throws IOException {
    IOUtil.assertFileIsReadable(result);

    int count = 0;
    try (final SamReader in = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(result)) {
        for (@SuppressWarnings("unused") final SAMRecord rec : in) {
            count++;
        }
    }
    return count;
}
 
示例19
@NotNull
static CobaltConfig createConfig(@NotNull final CommandLine cmd) throws ParseException {
    final int threadCount = defaultIntValue(cmd, THREADS, DEFAULT_THREADS);
    final int minMappingQuality = defaultIntValue(cmd, MIN_MAPPING_QUALITY, DEFAULT_MIN_MAPPING_QUALITY);
    final String refGenomePath = cmd.getOptionValue(REF_GENOME, "");

    final StringJoiner missingJoiner = new StringJoiner(", ");
    final String gcProfilePath = parameter(cmd, GC_PROFILE, missingJoiner);
    if (gcProfilePath.endsWith("gz")) {
        throw new ParseException("Please supply un-compressed " + GC_PROFILE + " file");
    }

    final String tumorBamPath = parameter(cmd, TUMOR_BAM, missingJoiner);
    final String referenceBamPath = parameter(cmd, REFERENCE_BAM, missingJoiner);
    final String outputDirectory = parameter(cmd, OUTPUT_DIR, missingJoiner);
    final String normal = parameter(cmd, REFERENCE, missingJoiner);
    final String tumor = parameter(cmd, TUMOR, missingJoiner);
    final String missing = missingJoiner.toString();

    final ValidationStringency validationStringency = defaultEnumValue(cmd, VALIDATION_STRINGENCY, ValidationStringency.DEFAULT_STRINGENCY);

    if (!missing.isEmpty()) {
        throw new ParseException("Missing the following parameters: " + missing);
    }

    return ImmutableCobaltConfig.builder()
            .threadCount(threadCount)
            .minMappingQuality(minMappingQuality)
            .gcProfilePath(gcProfilePath)
            .tumorBamPath(tumorBamPath)
            .referenceBamPath(referenceBamPath)
            .refGenomePath(refGenomePath)
            .outputDirectory(outputDirectory)
            .reference(normal)
            .tumor(tumor)
            .validationStringency(validationStringency)
            .build();
}
 
示例20
@Test(dataProvider = "successfulParameters")
public void testSuccessfulParameters(final Map<String, String> params, final String expectedFileName) throws IOException {
    final File expected = new File(getToolTestDataDir(), expectedFileName);
    final File output = createTempFile("output", ".bam");

    final ArgumentsBuilder args = new ArgumentsBuilder()
        .add(HtsgetReader.URL_LONG_NAME, ENDPOINT)
        .addOutput(output);
    params.forEach(args::add);
    
    runCommandLine(args);
    SamAssertionUtils.assertEqualBamFiles(output, expected, false, ValidationStringency.LENIENT);
}
 
示例21
@Test
public void testNonStrictBAMWithSilentValidationStringency() {
    final File outputFile = createTempFile("collect-allelic-counts-test-output", ".tsv");
    final String[] arguments = {
            "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.SITES_FILE_SHORT_NAME, SITES_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, outputFile.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.READ_VALIDATION_STRINGENCY_SHORT_NAME, ValidationStringency.SILENT.toString()
    };
    runCommandLine(arguments);
    //should complete successfully with --readValidationStringency SILENT
}
 
示例22
@Test(groups = {"cloud", "bucket"}, dataProvider = "getHttpPaths")
public void testHttpPaths(String reads, String index, String nonHttpReads, String nonHttpIndex, List<SimpleInterval> intervals, long expectedNumberOfReads) throws IOException {
    final ArgumentsBuilder args = new ArgumentsBuilder();
    final File out = createTempFile("out", ".bam");
    // this test reads tiny amounts of data from multiple places, if you don't set the prefetcher to a lower number
    // it loads large amounts of data that slows the test down significantly for no good reason
    args.addInput(reads)
            .add(StandardArgumentDefinitions.CLOUD_PREFETCH_BUFFER_LONG_NAME, 1)
            .add(StandardArgumentDefinitions.CLOUD_INDEX_PREFETCH_BUFFER_LONG_NAME, 1)
            .add("read-index", index)
            .addReference(GATKBaseTest.b37Reference)
            .addOutput(out);
    intervals.forEach(args::addInterval);
    runCommandLine(args);

    final ArgumentsBuilder args2 = new ArgumentsBuilder();
    final File out2 = createTempFile("out", ".bam");
    args2.addInput(nonHttpReads)
            .add("read-index", nonHttpIndex)
            .add(StandardArgumentDefinitions.CLOUD_PREFETCH_BUFFER_LONG_NAME, 1)
            .add(StandardArgumentDefinitions.CLOUD_INDEX_PREFETCH_BUFFER_LONG_NAME, 1)
            .addReference(GATKBaseTest.b37Reference)
            .addOutput(out2);
    intervals.forEach(args2::addInterval);
    runCommandLine(args2);

    try(final ReadsDataSource reader = new ReadsPathDataSource(out.toPath())){
        final long count = Utils.stream(reader).count();
        Assert.assertEquals( count, expectedNumberOfReads);
    }

    SamAssertionUtils.assertEqualBamFiles(out, out2, false, ValidationStringency.DEFAULT_STRINGENCY);
}
 
示例23
/**
 * @deprecated use {@link #parseClusterRecordsFromTileMetricsV3(Collection, Map, ReadStructure)} instead
 */
@Deprecated
public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile,
                                                final Map<Integer, File> phasingMetricsFiles,
                                                final ReadStructure readStructure,
                                                final ValidationStringency validationStringency)
        throws FileNotFoundException {
    final Map<Integer, Map<Integer, Collection<TilePhasingValue>>> phasingValues = getTilePhasingValues(phasingMetricsFiles, readStructure);
    final TileMetricsOutReader tileMetricsIterator = new TileMetricsOutReader(tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.THREE);
    final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(tileMetricsIterator);
    final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
    return getTileClusterRecordsV3(locationToMetricsMap, phasingValues, tileMetricsIterator.getDensity());
}
 
示例24
/**
 * Returns an unmodifiable collection of tile data read from the provided file. For each tile we will extract:
 * - lane number
 * - tile number
 * - density
 * - cluster ID
 * - Phasing & Prephasing for first template read (if available)
 * - Phasing & Prephasing for second template read (if available)
 */
public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure,
                                                final ValidationStringency validationStringency) throws FileNotFoundException {
    // Get the tile metrics lines from TileMetricsOut, keeping only the last value for any Lane/Tile/Code combination
    final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(new TileMetricsOutReader
            (tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.TWO));

    // Collect the tiles by lane & tile, and then collect the metrics by lane
    final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
    final Collection<Tile> tiles = new LinkedList<>();
    for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) {
        final Collection<IlluminaTileMetrics> tileRecords = entry.getValue();

        // Get a mapping from metric code number to the corresponding IlluminaTileMetrics
        final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords);

        final Set<Integer> observedCodes = codeMetricsMap.keySet();
        if (!(observedCodes.contains(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()) && observedCodes.contains(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())))
            throw new PicardException(String.format("Expected to find cluster and density record codes (%s and %s) in records read for tile location %s (lane:tile), but found only %s.",
                    IlluminaMetricsCode.CLUSTER_ID.getMetricsCode(), IlluminaMetricsCode.DENSITY_ID.getMetricsCode(), entry.getKey(), observedCodes));

        final IlluminaTileMetrics densityRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()));
        final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode()));

        // Snag the phasing data for each read in the read structure. For both types of phasing values, this is the median of all of the individual values seen
        final Collection<TilePhasingValue> tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure, validationStringency);

        tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue(),
                tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()])));
    }

    return Collections.unmodifiableCollection(tiles);
}
 
示例25
/** Returns a partitioned collection of lane number to Tile objects from the provided basecall directory. */
public static Map<Integer, ? extends Collection<Tile>> readLaneTiles(final File illuminaRunDirectory,
                                                                     final ReadStructure readStructure,
                                                                     final ValidationStringency validationStringency,
                                                                     final boolean isNovaSeq) {
    final Collection<Tile> tiles;
    try {
        final List<File> tileMetricsOutFiles = TileMetricsUtil.findTileMetricsFiles(illuminaRunDirectory, readStructure.totalCycles, isNovaSeq);
        if (isNovaSeq) {
            tiles = TileMetricsUtil.parseClusterRecordsFromTileMetricsV3(
                    tileMetricsOutFiles,
                    TileMetricsUtil.renderPhasingMetricsFilesFromBasecallingDirectory(illuminaRunDirectory),
                    readStructure
            );
        } else {
            tiles = TileMetricsUtil.parseTileMetrics(
                    tileMetricsOutFiles.get(0),
                    readStructure,
                    validationStringency
            );
        }
    } catch (final FileNotFoundException e) {
        throw new PicardException("Unable to open laneMetrics file.", e);
    }

    return tiles.stream().filter(tile -> tile.getLaneNumber() > 0).collect(Collectors.groupingBy(Tile::getLaneNumber));
}
 
示例26
/** Parses the tile data from the basecall directory and writes to both the lane and phasing metrics files */
public static void collectLaneMetrics(final File runDirectory, final File outputDirectory, final String outputPrefix,
                                      final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile,
                                      final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile,
                                      final ReadStructure readStructure, final String fileExtension,
                                      final ValidationStringency validationStringency,
                                      final boolean isNovaSeq) {
    final Map<Integer, ? extends Collection<Tile>> laneTiles = readLaneTiles(runDirectory, readStructure, validationStringency, isNovaSeq);
    writeLaneMetrics(laneTiles, outputDirectory, outputPrefix, laneMetricsFile, fileExtension);
    writePhasingMetrics(laneTiles, outputDirectory, outputPrefix, phasingMetricsFile, fileExtension, isNovaSeq);
}
 
示例27
@Test
/**
 * This test just checks that PrintReads runs with a config arg without blowing up.
 * It does not test whether the config settings were actually loaded correctly
 * (tested by {@link ConfigIntegrationTest}).
 */
public void testPrintReadsWithConfigFile() throws Exception {

    final String inputFile = publicTestDir + "NA12878.chr17_69k_70k.dictFix.bam";
    final File outputFile = createTempFile("TEST_OUT_NA12878.chr17_69k_70k.dictFix", ".bam");

    // Create some arguments for our command:
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addRaw("--" + StandardArgumentDefinitions.GATK_CONFIG_FILE_OPTION);
    args.addRaw(configFilePath);
    args.addRaw("-" + StandardArgumentDefinitions.INPUT_SHORT_NAME);
    args.addRaw(inputFile);
    args.addRaw("-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME);
    args.addRaw(outputFile);

    // Run our command:
    runCommandLine(args.getArgsArray());

    // Ensure the files are the same:
    IntegrationTestSpec.assertMatchingFiles(
            Collections.singletonList(new File(inputFile)),
            Collections.singletonList(outputFile.getAbsolutePath()),
            true,
            ValidationStringency.LENIENT
    );
}
 
示例28
protected void test() {
    try {
        final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);

        // Validate it has the expected cigar
        validator.setIgnoreWarnings(true);
        validator.setVerbose(true, 1000);
        validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
        SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        SamReader samReader = factory.open(getOutput());
        final SAMRecordIterator iterator = samReader.iterator();
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            Assert.assertEquals(rec.getCigarString(), expectedCigar);
            if (SAMUtils.hasMateCigar(rec)) {
                Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar);
            }
        }
        CloserUtil.close(samReader);

        // Run validation on the output file
        samReader = factory.open(getOutput());
        final boolean validated = validator.validateSamFileVerbose(samReader, null);
        CloserUtil.close(samReader);

        Assert.assertTrue(validated, "ValidateSamFile failed");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
示例29
@Test(description = "This is to test https://github.com/broadinstitute/hellbender/issues/322", groups = {"cloud", "spark"}, enabled = false)
public void testPlottingWorkflow() throws IOException {
    final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
    final String chr2021Reference2bit = GCS_b37_CHR20_21_REFERENCE_2BIT;
    final String dbSNPb37_chr2021 = resourceDir + DBSNP_138_B37_CH20_1M_1M1K_VCF;
    final String HiSeqBam_chr20 = getResourceDir() + WGS_B37_CH20_1M_1M1K_BAM;

    final File actualHiSeqBam_recalibrated = createTempFile("actual.recalibrated", ".bam");

    final String tablePre = createTempFile("gatk4.pre.cols", ".table").getAbsolutePath();
    final String argPre = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + HiSeqBam_chr20
            + " -O " + tablePre;
    new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argPre));

    final String argApply = "-I " + HiSeqBam_chr20 + " --bqsr-recal-file " + tablePre + " -O " + actualHiSeqBam_recalibrated.getAbsolutePath();
    new ApplyBQSRSpark().instanceMain(Utils.escapeExpressions(argApply));

    final File actualTablePost = createTempFile("gatk4.post.cols", ".table");
    final String argsPost = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + actualHiSeqBam_recalibrated.getAbsolutePath()
            + " -O " + actualTablePost.getAbsolutePath();
    new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argsPost));

    final File expectedHiSeqBam_recalibrated = new File(resourceDir + "expected.NA12878.chr17_69k_70k.dictFix.recalibrated.DIQ.bam");

    SamAssertionUtils.assertSamsEqual(actualHiSeqBam_recalibrated, expectedHiSeqBam_recalibrated, ValidationStringency.LENIENT);

    final File expectedTablePost = new File(getResourceDir() + "expected.NA12878.chr17_69k_70k.postRecalibrated.txt");
    IntegrationTestSpec.assertEqualTextFiles(actualTablePost, expectedTablePost);
}
 
示例30
@Override
public OrderedByteArray apply(OrderedByteArray object) {
	if (object == null)
		throw new NullPointerException();

	log.debug("processing container " + object.order);
	Container container;
	try {
		container = ContainerIO.readContainer(header.getVersion(), new ByteArrayInputStream(object.bytes));
		if (container.isEOF())
			return null;

		ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(container.nofRecords);
		parser.getRecords(container, records, ValidationStringency.SILENT);
		n.normalize(records, null, 0, container.header.substitutionMatrix);

		ByteArrayOutputStream bamBAOS = new ByteArrayOutputStream();
		BlockCompressedOutputStream os = new BlockCompressedOutputStream(bamBAOS, null);
		codec.setOutputStream(os);
		for (CramCompressionRecord record : records) {
			SAMRecord samRecord = f.create(record);
			codec.encode(samRecord);
		}
		os.flush();
		OrderedByteArray bb = new OrderedByteArray();
		bb.bytes = bamBAOS.toByteArray();
		bb.order = object.order;
		log.debug(String.format("Converted OBA %d, records %d", object.order, records.size()));
		return bb;
	} catch (IOException | IllegalArgumentException | IllegalAccessException e) {
		throw new RuntimeException(e);
	}
}