Java源码示例:org.apache.spark.SparkException
示例1
public QueryResult getSparkQueryResult(Entry<List<Attribute>, List<GenericRowWithSchema>> sparkData)
throws Exception {
if (sparkData == null) {
return new QueryResult(new ArrayList<>(), new ArrayList<>());
}
if (CollectionUtils.isEmpty(sparkData.getKey())) {
throw new SparkException("collect data error");
}
List<Attribute> attributes = sparkData.getKey();
List<GenericRowWithSchema> value = sparkData.getValue();
List<Object> data = new ArrayList<>();
List<ColumnMetaData> meta = new ArrayList<>();
value.stream().forEach(column -> {
data.add(column.values());
});
for (int index = 0; index < sparkData.getKey().size(); index++) {
Attribute attribute = sparkData.getKey().get(index);
ScalarType columnType = getColumnType(attribute.dataType());
meta.add(new ColumnMetaData(index, false, true, false, false,
attribute.nullable() ? 1 : 0, true, -1, attribute.name(), attribute.name(), null, -1, -1, null, null,
columnType, true, false, false, columnType.columnClassName()));
}
return new QueryResult(meta, data);
}
示例2
private static RuntimeException beamExceptionFrom(final Throwable e) {
// Scala doesn't declare checked exceptions in the bytecode, and the Java compiler
// won't let you catch something that is not declared, so we can't catch
// SparkException directly, instead we do an instanceof check.
if (e instanceof SparkException) {
if (e.getCause() != null && e.getCause() instanceof UserCodeException) {
UserCodeException userException = (UserCodeException) e.getCause();
return new Pipeline.PipelineExecutionException(userException.getCause());
} else if (e.getCause() != null) {
return new Pipeline.PipelineExecutionException(e.getCause());
}
}
return runtimeExceptionFrom(e);
}
示例3
private static RuntimeException beamExceptionFrom(final Throwable e) {
// Scala doesn't declare checked exceptions in the bytecode, and the Java compiler
// won't let you catch something that is not declared, so we can't catch
// SparkException directly, instead we do an instanceof check.
if (e instanceof SparkException) {
if (e.getCause() != null && e.getCause() instanceof UserCodeException) {
UserCodeException userException = (UserCodeException) e.getCause();
return new Pipeline.PipelineExecutionException(userException.getCause());
} else if (e.getCause() != null) {
return new Pipeline.PipelineExecutionException(e.getCause());
}
}
return runtimeExceptionFrom(e);
}
示例4
public static LocalSqoopSparkClient createSqoopSparkClient(SqoopConf sqoopConf)
throws IOException, SparkException {
Map<String, String> sparkConf = prepareSparkConfMapFromSqoopConfig(sqoopConf);
// Submit spark job through local spark context while spark master is local
// mode, otherwise submit
// spark job through remote spark context.
String master = sparkConf.get("spark.master");
if (master.equals("local") || master.startsWith("local[")) {
// With local spark context, all user sessions share the same spark
// context.
return LocalSqoopSparkClient.getInstance(generateSparkConf(sparkConf));
} else {
LOG.info("Using yarn submitter");
//TODO: hook up yarn submitter
return null;
}
}
示例5
@Test
public void testBatchProfilerWithInvalidProfile() {
profilerProperties.put(TELEMETRY_INPUT_READER.getKey(), JSON.toString());
profilerProperties.put(TELEMETRY_INPUT_PATH.getKey(), "src/test/resources/telemetry.json");
// the batch profiler should error out, if there is a bug in *any* of the profiles
BatchProfiler profiler = new BatchProfiler();
assertThrows(
SparkException.class,
() ->
profiler.run(
spark,
profilerProperties,
getGlobals(),
readerProperties,
fromJSON(invalidProfileJson)));
}
示例6
@Test
public void testReadsMissingReadGroups() {
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname,
true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY);
samRecordSetBuilder.addFrag("READ" , 0, 10000, false);
JavaRDD<GATKRead> reads = ctx.parallelize(Lists.newArrayList(samRecordSetBuilder.getRecords()), 2).map(SAMRecordToGATKReadAdapter::new);
reads = reads.map(r -> {r.setReadGroup(null); return r;});
SAMFileHeader header = samRecordSetBuilder.getHeader();
try {
MarkDuplicatesSparkUtils.transformToDuplicateNames(header, MarkDuplicatesScoringStrategy.SUM_OF_BASE_QUALITIES, null, reads, 2, false).collect();
Assert.fail("Should have thrown an exception");
} catch (Exception e){
Assert.assertTrue(e instanceof SparkException);
Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup);
}
}
示例7
@Test
public void readExceptionsCauseAbort() throws StandardException, IOException {
SparkPairDataSet<ExecRow, ExecRow> dataset = new SparkPairDataSet<>(SpliceSpark.getContextUnsafe().parallelizePairs(tenRows).mapToPair(new FailFunction()));
JavaPairRDD<ExecRow, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();
final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(false));
conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR,FakeOutputFormat.class,FakeOutputFormat.class);
// workaround for SPARK-21549 on spark-2.2.0
conf.set("mapreduce.output.fileoutputformat.outputdir","/tmp");
File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
file.delete();
file.mkdir();
conf.set("abort.directory", file.getAbsolutePath());
try {
rdd.saveAsNewAPIHadoopDataset(conf);
Assert.fail("Expected exception");
} catch (Exception se) {
Assert.assertTrue("Unexpected exception", se instanceof SparkException);
}
File[] files = file.listFiles();
Assert.assertTrue("Abort() not called", files.length > 0);
}
示例8
@Test
public void writeExceptionsCauseAbort() throws StandardException, IOException {
SparkPairDataSet<RowLocation, ExecRow> dataset = new SparkPairDataSet<>(SpliceSpark.getContextUnsafe().parallelizePairs(tenRows).mapToPair(new ToRowLocationFunction()));
JavaPairRDD<RowLocation, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();
final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(true));
conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class, FakeOutputFormat.class);
// workaround for SPARK-21549 on spark-2.2.0
conf.set("mapreduce.output.fileoutputformat.outputdir","/tmp");
File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
file.delete();
file.mkdir();
conf.set("abort.directory", file.getAbsolutePath());
try {
rdd.saveAsNewAPIHadoopDataset(conf);
Assert.fail("Expected exception");
} catch (Exception se) {
Assert.assertTrue("Unexpected exception", se instanceof SparkException);
}
File[] files = file.listFiles();
Assert.assertTrue("Abort() not called", files.length > 0);
}
示例9
public QueryResult getFlinkQueryResult(Entry<TableSchema, List<Row>> sparkData) throws Exception {
if (sparkData == null) {
return new QueryResult(new ArrayList<>(), new ArrayList<>());
}
TableSchema tableSchema = sparkData.getKey();
if (tableSchema == null || tableSchema.getFieldDataTypes().length != tableSchema.getFieldNames().length) {
throw new SparkException("collect data error");
}
org.apache.flink.table.types.DataType[] fieldDataTypes = tableSchema.getFieldDataTypes();
String[] fieldNames = tableSchema.getFieldNames();
List<Row> value = sparkData.getValue();
List<Object> data = new ArrayList<>();
List<ColumnMetaData> meta = new ArrayList<>();
value.stream().forEach(column -> {
Object[] objects = new Object[column.getArity()];
for (int i = 0; i < column.getArity(); i++) {
objects[i] = column.getField(i);
}
data.add(Arrays.asList(objects));
});
for (int index = 0; index < fieldNames.length; index++) {
ScalarType columnType = getColumnType2(fieldDataTypes[index]);
meta.add(new ColumnMetaData(index, false, true, false, false,
1, true, -1, fieldNames[index], fieldNames[index], null, -1, -1, null, null,
columnType, true, false, false, columnType.columnClassName()));
}
return new QueryResult(meta, data);
}
示例10
@Test(expected = SparkException.class)
public void testStringDataFrameToVectorDataFrameNonNumbers() {
List<String> list = new ArrayList<>();
list.add("[cheeseburger,fries]");
JavaRDD<String> javaRddString = sc.parallelize(list);
JavaRDD<Row> javaRddRow = javaRddString.map(new StringToRow());
SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
Dataset<Row> inDF = sparkSession.createDataFrame(javaRddRow, schema);
Dataset<Row> outDF = RDDConverterUtilsExt.stringDataFrameToVectorDataFrame(sparkSession, inDF);
// trigger evaluation to throw exception
outDF.collectAsList();
}
示例11
@Test (expected = SparkException.class)
public void readInputFormatMismatchTranslator() throws Exception {
Map<String, Object> paramMap = new HashMap<>();
paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format");
paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath());
paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, KeyValueTextInputFormat.class.getCanonicalName());
paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName());
config = ConfigFactory.parseMap(paramMap);
FileSystemInput formatInput = new FileSystemInput();
assertNoValidationFailures(formatInput, config);
formatInput.configure(config);
formatInput.read().show();
}
示例12
/**
* Test that in VCF mode we're >= 99% concordant with GATK3.5 results
* THIS TEST explodes with an exception because Allele-Specific annotations are not supported in vcf mode yet.
* It's included to parallel the matching (also exploding) test for the non-spark HaplotypeCaller
* {@link org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerIntegrationTest#testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations()}
*/
@Test(expectedExceptions = SparkException.class) //this should be a UserException, but spark exceptions are not unwrapped yet
public void testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations() throws Exception {
Utils.resetRandomGenerator();
final File output = createTempFile("testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations", ".vcf");
//Created by running
//java -jar ~/bin/GenomeAnalysisTK-3.5.0/GenomeAnalysisTK.jar -T HaplotypeCaller \
// -I ./src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam \
// -R src/test/resources/large/human_g1k_v37.20.21.fasta -L 20:10000000-10100000 \
// --out as.gatk3.5.noDownsample.vcf -G StandardHC -G Standard -G AS_Standard \
// --disableDithering --no_cmdline_in_header -dt NONE --maxReadsInRegionPerSample 100000000 --minReadsPerAlignmentStart 100000
final File gatk3Output = new File(TEST_FILES_DIR + "expected.testVCFMode.gatk3.5.alleleSpecific.vcf");
final String[] args = {
"-I", NA12878_20_21_WGS_bam,
"-R", b37_2bit_reference_20_21,
"-L", "20:10000000-10100000",
"-O", output.getAbsolutePath(),
"-G", "StandardAnnotation",
"-G", "AS_StandardAnnotation",
"-pairHMM", "AVX_LOGLESS_CACHING",
"-stand_call_conf", "30.0"
};
runCommandLine(args);
final double concordance = HaplotypeCallerIntegrationTest.calculateConcordance(output, gatk3Output);
Assert.assertTrue(concordance >= 0.99, "Concordance with GATK 3.5 in AS VCF mode is < 99% (" + concordance + ")");
}
示例13
@Test(expected = SparkException.class)
public void testStringDataFrameToVectorDataFrameNonNumbers() {
List<String> list = new ArrayList<>();
list.add("[cheeseburger,fries]");
JavaRDD<String> javaRddString = sc.parallelize(list);
JavaRDD<Row> javaRddRow = javaRddString.map(new StringToRow());
SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
Dataset<Row> inDF = sparkSession.createDataFrame(javaRddRow, schema);
Dataset<Row> outDF = RDDConverterUtilsExt.stringDataFrameToVectorDataFrame(sparkSession, inDF);
// trigger evaluation to throw exception
outDF.collectAsList();
}
示例14
@Test
public void testNonExistantReadGroupInRead() {
final MarkDuplicatesSparkTester tester = new MarkDuplicatesSparkTester(true);
tester.addMatePair("RUNID:7:1203:2886:82292", 19, 19, 485253, 485253, false, false, true, true, "42M59S", "59S42M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "NotADuplicateGroup");
try {
tester.runTest();
Assert.fail("Should have thrown an exception");
} catch (Exception e){
Assert.assertTrue(e instanceof SparkException);
Assert.assertTrue(e.getCause() instanceof UserException.HeaderMissingReadGroup);
}
}
示例15
@Test
public void testNoReadGroupInRead() {
final MarkDuplicatesSparkTester tester = new MarkDuplicatesSparkTester(true);
tester.addMatePair("RUNID:7:1203:2886:82292", 19, 19, 485253, 485253, false, false, true, true, "42M59S", "59S42M", true, false, false, false, false, DEFAULT_BASE_QUALITY, null);
try {
tester.runTest();
Assert.fail("Should have thrown an exception");
} catch (Exception e){
Assert.assertTrue(e instanceof SparkException);
Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup);
}
}
示例16
private void processStream(AsyncResponse async, List<String> inputStrings) {
try {
SparkConf sparkConf = new SparkConf().setMaster("local[*]")
.setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId());
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
jssc.addStreamingListener(sparkListener);
JavaDStream<String> receiverStream = null;
if ("queue".equals(receiverType)) {
Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
for (int i = 0; i < 30; i++) {
rddQueue.add(jssc.sparkContext().parallelize(inputStrings));
}
receiverStream = jssc.queueStream(rddQueue);
} else {
receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings));
}
JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false);
wordCounts.foreachRDD(new OutputFunction(streamOut));
jssc.start();
executor.execute(new SparkJob(async, sparkListener));
} catch (Exception ex) {
// the compiler does not allow to catch SparkException directly
if (ex instanceof SparkException) {
async.cancel(60);
} else {
async.resume(new WebApplicationException(ex));
}
}
}
示例17
private IOException parseSparkException(SparkException e){
String errMessage = e.getMessage();
if(errMessage==null)
return new IOException("Unknown Spark exception");
else if(SPARK_CANCELLATION_PATTERN.matcher(errMessage).find())
return new OperationCancelledException();
else return new IOException(e);
}