Java源码示例:org.apache.pig.impl.logicalLayer.schema.Schema
示例1
@Before
public void setUp() throws Exception {
ArrayList<Tuple> tuples = new ArrayList<Tuple>();
log.info("Setting up");
pigServer = new PigServer(ExecType.LOCAL);
data = resetData(pigServer);
Random r = new Random();
for (int i = 0; i < MAX; i++) {
tuples.add(tuple(i,GenRandomData.genRandString(r)));
}
Schema s = new Schema();
s.add(new Schema.FieldSchema("index", DataType.INTEGER));
s.add(new Schema.FieldSchema("name", DataType.CHARARRAY));
data.set("test", s, tuples);
}
示例2
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
/*either two chararray arguments*/
List<FieldSchema> fields = new ArrayList<FieldSchema>();
fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
Schema twoArgInSchema = new Schema(fields);
funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));
/*or two chararray and a boolean argument*/
fields = new ArrayList<FieldSchema>();
fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
Schema threeArgInSchema = new Schema(fields);
funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema));
return funcList;
}
示例3
@SuppressWarnings("unchecked")
private static void writeField(String name, Object value, Byte type, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth) throws IOException {
if (shouldWriteField(name, properties, depth)) {
String operation = getPartialOperation(mapPartialOperationMap, name, properties);
// check if the name has the property update-map-fields/remove-map-fields
// if yes, we need special treatments here as we need to loop through the tuple
// be aware the the operation here is not vespa operation such as "put" and "update"
// operation here are the field name we wish use to such as "assign" and "remove"
if (operation != null) {
writePartialUpdateAndRemoveMap(name, value, g, properties, schema, op, depth, operation);
} else {
g.writeFieldName(name);
if (shouldWritePartialUpdate(op, depth)) {
writePartialUpdate(value, type, g, name, properties, schema, op, depth);
} else {
writeValue(value, type, g, name, properties, schema, op, depth);
}
}
}
}
示例4
@Override
public Schema outputSchema(Schema input) {
if (null != this.schemaFunction) {
try {
Tuple t = TupleFactory.getInstance().newTuple(1);
// Strip enclosing '{}' from schema
t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
} catch (ParserException pe) {
throw new RuntimeException(pe);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} else {
return this.schema;
}
}
示例5
/**
* Test one-level Pig Schema: multiple fields for a bag
*/
@Test
public void testResourceSchemaWithInvalidPigSchema()
throws FrontendException {
String [] aliases ={"f1", "f2"};
byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
Schema level0 = TypeCheckingTestUtil.genFlatSchema(
aliases,types);
Schema.FieldSchema fld0 =
new Schema.FieldSchema("f0", level0, DataType.BAG);
Schema level1 = new Schema(fld0);
try {
Schema.getPigSchema(new ResourceSchema(level1));
Assert.fail();
} catch(FrontendException e) {
assertTrue(e.getErrorCode()==2218);
}
}
示例6
@Test
public void testDescribeNestedAlias() throws Exception{
String[] input = {
"1\t3",
"2\t4",
"3\t5"
};
Util.createInputFile(cluster, "table_testDescribeNestedAlias", input);
pigServer.registerQuery("A = LOAD 'table_testDescribeNestedAlias' as (a0, a1);");
pigServer.registerQuery("P = GROUP A by a1;");
// Test RelationalOperator
pigServer.registerQuery("B = FOREACH P { D = ORDER A by $0; generate group, D.$0; };");
// Test ExpressionOperator - negative test case
pigServer.registerQuery("C = FOREACH A { D = a0/a1; E=a1/a0; generate E as newcol; };");
Schema schema = pigServer.dumpSchemaNested("B", "D");
Assert.assertTrue(schema.toString().equalsIgnoreCase("{a0: bytearray,a1: bytearray}"));
try {
schema = pigServer.dumpSchemaNested("C", "E");
} catch (FrontendException e) {
Assert.assertTrue(e.getErrorCode() == 1113);
}
}
示例7
@Test
public void getBagTest() throws Exception
{
ReportBuilder udf = new ReportBuilder();
udf.setUDFContextSignature("test");
List<Schema.FieldSchema> fieldSchemaList = new ArrayList<Schema.FieldSchema>();
fieldSchemaList.add(new Schema.FieldSchema("msisdn", DataType.LONG));
fieldSchemaList.add(new Schema.FieldSchema("ts", DataType.INTEGER));
fieldSchemaList.add(new Schema.FieldSchema("center_lon", DataType.DOUBLE));
fieldSchemaList.add(new Schema.FieldSchema("center_lat", DataType.DOUBLE));
Schema schemaTuple = new Schema(fieldSchemaList);
Schema schemaBag = new Schema(new Schema.FieldSchema(ReportBuilder.ORDERED_ROUTES, schemaTuple, DataType.BAG));
udf.outputSchema(schemaBag);
Tuple inputTuple = TupleFactory.getInstance().newTuple();
DataBag inputBag = BagFactory.getInstance().newDefaultBag();
inputBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(71230000000L, 1382351612, 10.697, 20.713)));
inputTuple.append(inputBag);
DataBag outputBag = udf.exec(inputTuple);
Assert.assertEquals(inputBag, outputBag);
}
示例8
@Test //end to end test
public void testLimitStoreSchema1() throws Exception{
Util.createLocalInputFile("student", new String[]{"joe smith:18:3.5","amy brown:25:2.5","jim fox:20:4.0","leo fu:55:3.0"});
pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);");
pigServer.registerQuery("d = distinct a;");
pigServer.registerQuery("lim = limit d 1;");
String outFile = "limitSchemaOut";
Util.deleteDirectory(new File(outFile));
pigServer.store("lim", outFile, "PigStorage('\\t', '-schema')");
pigServer.dumpSchema("lim");
pigServer.registerQuery("b = LOAD '" + outFile + "' using PigStorage('\\t', '-schema');");
Schema genSchema = pigServer.dumpSchema("b");
System.err.println(genSchema);
Assert.assertNotNull(genSchema);
}
示例9
private Schema buildElNinoInputSchema() throws FrontendException {
// Build Field Schema
List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("buoy" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("day" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("latitude" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("longitude" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("zon_winds" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("mer_winds" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("humidity" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("airtemp" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("s_s_temp" , DataType.DOUBLE ));
return new Schema(fieldSchemas);
}
示例10
/**
* <ul> steps:
* <li>Writes using the thrift mapping
* <li>Reads using the pig mapping
* <li>Use Elephant bird to convert from thrift to pig
* <li>Check that both transformations give the same result
* @param o the object to convert
* @throws TException
*/
public static <T extends TBase<?,?>> void validateSameTupleAsEB(T o) throws TException {
final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter();
@SuppressWarnings("unchecked")
final Class<T> class1 = (Class<T>) o.getClass();
final MessageType schema = thriftSchemaConverter.convert(class1);
final StructType structType = ThriftSchemaConverter.toStructType(class1);
final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
final Schema pigSchema = thriftToPig.toSchema();
final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true);
RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
o.write(p);
final Tuple t = tupleRecordConverter.getCurrentRecord();
final Tuple expected = thriftToPig.getPigTuple(o);
assertEquals(expected.toString(), t.toString());
final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema);
assertEquals(schema.toString(), filtered.toString());
}
示例11
@Override
public Schema outputSchema(Schema input) {
// "dimensions" string is the default namespace assigned to the output
// schema. this can be overridden by specifying user defined schema
// names in foreach operator. if user defined schema names are not
// specified then the output schema of foreach operator using this UDF
// will have "dimensions::" namespace for all fields in the tuple
try {
return new Schema(new FieldSchema("dimensions", input, DataType.BAG));
} catch (FrontendException e) {
// we are specifying BAG explicitly, so this should not happen.
throw new RuntimeException(e);
}
}
示例12
@Test
public void testRangeOrderByStartNOSchema() throws IOException, ParserException{
String query;
query =
" l1 = load '" + INP_FILE_5FIELDS + "';"
+ " o = order l1 by $3 .. DESC;"
;
compileAndCompareSchema((Schema)null, query, "o");
//check number of sort expression plans
LogicalPlan lp = createAndProcessLPlan(query);
boolean[] isAsc = {false};
checkNumExpressionPlansForSort(lp, 1, isAsc);
Util.registerMultiLineQuery(pigServer, query);
pigServer.explain("o", System.err);
Iterator<Tuple> it = pigServer.openIterator("o");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(11,21,31,41,51)",
"(10,20,30,40,50)",
});
Util.checkQueryOutputs(it, expectedRes);
}
示例13
private static void writePartialUpdateAndRemoveMap(String name, Object value, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth, String operation) throws IOException {
schema = (schema != null) ? schema.getField(0).schema : null;
// extract the key of map and keys in map for writing json when partial updating maps
Schema valueSchema = (schema != null) ? schema.getField(1).schema : null;
// data format { ( key; id, value: (abc,123,(123234,bbaa))) }
// the first element of each tuple in the bag will be the map to update
// the second element of each tuple in the bag will be the new value of the map
DataBag bag = (DataBag) value;
for (Tuple element : bag) {
if (element.size() != 2) {
continue;
}
String k = (String) element.get(0);
Object v = element.get(1);
Byte t = DataType.findType(v);
if (t == DataType.TUPLE) {
g.writeFieldName(name + "{" + k + "}");
if (operation.equals(PARTIAL_UPDATE_REMOVE)) {
g.writeStartObject();
g.writeFieldName(PARTIAL_UPDATE_REMOVE);
g.writeNumber(0);
g.writeEndObject();
} else {
writePartialUpdate(v, t, g, name, properties, valueSchema, op, depth);
}
}
}
}
示例14
@Test
public void testComplexCast2() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
String[] input = {
"[key#1,key2#2]",
};
Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast2", input);
String query = "a = load '" + tmpDirName + "/testComplexCast2' as (m:[int]);" +
"b = foreach a generate ([long])m;";
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("b");
assertEquals("Checking expected schema",sch.toString(), "{m: map[long]}");
Iterator<Tuple> it = pig.openIterator("b");
Assert.assertTrue(it.hasNext());
Tuple t = it.next();
Assert.assertTrue(t.size()==1);
Assert.assertTrue(t.get(0) instanceof Map);
Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Long);
Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Long);
Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));
Assert.assertFalse(it.hasNext());
}
示例15
@Override
public Schema outputSchema(Schema input) {
try {
if (input.size() < 3) {
return null;
}
return new Schema(input.getField(2));
} catch (Exception e) {
return null;
}
}
示例16
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
List<Schema.FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
fields.add(new Schema.FieldSchema(null, DataType.DOUBLE));
fields.add(new Schema.FieldSchema(null, DataType.DOUBLE));
funcList.add(new FuncSpec(this.getClass().getName(), new Schema(fields)));
return funcList;
}
示例17
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DOUBLE))));
return funcList;
}
示例18
private boolean nullEquals(Schema currentSchema, Schema newSchema) {
if(currentSchema == null){
if(newSchema != null){
return false;
}
return true;
}
return Schema.equals(currentSchema, newSchema, false, true);
}
示例19
public void add(Schema schema, Map<?, ?> m) {
super.add(m);
size.add(m.size());
FieldSchema field = getField(schema, 0);
if (m.size() > 0 && key == null) {
key = new FieldSummaryData();
key.setName(getName(field));
value = new FieldSummaryData();
value.setName(getName(field));
}
for (Map.Entry<?, ?> entry : m.entrySet()) {
key.add(null, entry.getKey());
value.add(getSchema(field), entry.getValue());
}
}
示例20
public static Schema genFlatSchema(String[] aliases, byte[] types) {
if (aliases.length != types.length) {
throw new AssertionError(" aliase number and type number don't match") ;
}
List<Schema.FieldSchema> fsList = new ArrayList<Schema.FieldSchema>() ;
for(int i=0; i<aliases.length ;i++) {
fsList.add(new Schema.FieldSchema(aliases[i], types[i])) ;
}
return new Schema(fsList) ;
}
示例21
@Test
public void testNullTypeInTuple() throws IOException {
Query query = new Query();
query.value = "";
Schema fakeSchema = getSchema(makeFieldSchema("a", DataType.NULL));
Tuple fakeTuple = makeTuple("something");
sty = getSty(withMockResult(withMockSchema(getServer(), fakeSchema), fakeTuple));
runWithoutOutput(() -> sty.execute(query));
Assert.assertFalse(query.failed());
List<TypedObject> result = query.getResult().getColumn("a").getValues();
Assert.assertNotNull(result);
Assert.assertEquals(result.size(), 1);
Assert.assertNull(result.get(0));
}
示例22
@Test
public void testRangeOrderByMixNOSchema() throws IOException, ParserException{
String query;
query =
" l1 = load '" + INP_FILE_5FIELDS + "';"
+ " o = order l1 by $1 .. $2 DESC, $0 , $4 .. DESC;"
;
compileAndCompareSchema((Schema)null, query, "o");
//check number of sort expression plans
LogicalPlan lp = createAndProcessLPlan(query);
boolean[] isAsc = {false, false,true,false};
checkNumExpressionPlansForSort(lp, 4, isAsc);
Util.registerMultiLineQuery(pigServer, query);
pigServer.explain("o", System.err);
Iterator<Tuple> it = pigServer.openIterator("o");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(11,21,31,41,51)",
"(10,20,30,40,50)",
});
Util.checkQueryOutputs(it, expectedRes);
}
示例23
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));
return funcList;
}
示例24
@Override
public Schema outputSchema(Schema input)
{
try {
Schema.FieldSchema inputFieldSchema = input.getField(0);
if (inputFieldSchema.type != DataType.BAG)
{
throw new RuntimeException("Expected a BAG as input");
}
Schema inputBagSchema = inputFieldSchema.schema;
if (inputBagSchema.getField(0).type != DataType.TUPLE)
{
throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
DataType.findTypeName(inputBagSchema.getField(0).type)));
}
return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
.getName()
.toLowerCase(), input),
DataType.DOUBLE));
} catch (FrontendException e) {
throw new RuntimeException(e);
}
}
示例25
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
Schema s = new Schema();
s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
funcList.add(new FuncSpec(this.getClass().getName(), s));
return funcList;
}
示例26
@Test
public void testUnTypedMap() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
String[] input = {
"[key#1,key2#2]",
};
Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testUnTypedMap", input);
String query = "a = load '" + tmpDirName + "/testUnTypedMap' as (m:[]);";
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("a");
assertEquals("Checking expected schema",sch.toString(), "{m: map[]}");
Iterator<Tuple> it = pig.openIterator("a");
Assert.assertTrue(it.hasNext());
Tuple t = it.next();
Assert.assertTrue(t.size()==1);
Assert.assertTrue(t.get(0) instanceof Map);
Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataByteArray);
Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof DataByteArray);
Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));
Assert.assertFalse(it.hasNext());
}
示例27
@Test
public void testNestedSortEndToEnd1() throws Exception {
File tmpFile1 = Util.createTempFileDelOnExit("test", "txt");
PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1));
ps1.println("1\t2\t3");
ps1.println("1\t3\t4");
ps1.println("1\t2\t4");
ps1.println("1\t2\t4");
ps1.println("1\t2\t4");
ps1.println("2\t3\t4");
ps1.close();
String expected[] = {
"(2,{(2,3,4)})",
"(1,{(1,2,3),(1,2,4),(1,2,4),(1,2,4),(1,3,4)})"
};
String clusterPath = Util.removeColon(tmpFile1.getCanonicalPath());
Util.copyFromLocalToCluster(cluster, tmpFile1.getCanonicalPath(), clusterPath);
pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(clusterPath) + "' AS (a0, a1, a2);");
pigServer.registerQuery("B = group A by $0 parallel 2;");
pigServer.registerQuery("C = foreach B { D = limit A 10; E = order D by $1; generate group, E;};");
Iterator<Tuple> iter = pigServer.openIterator("C");
Schema s = pigServer.dumpSchema("C");
Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(s));
Util.deleteFile(cluster, clusterPath);
}
示例28
@Test
public void testMergeJoinSch2() throws IOException{
pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
Schema mjSch = null, shjSch = null;
pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using 'merge';");
mjSch = pigServer.dumpSchema("C");
pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
shjSch = pigServer.dumpSchema("C");
Assert.assertTrue(shjSch == null);
}
示例29
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
List<FieldSchema> fields = requestedPigSchema.getFields();
List<Type> newFields = new ArrayList<Type>();
for (int i = 0; i < fields.size(); i++) {
FieldSchema fieldSchema = fields.get(i);
String name = name(fieldSchema.alias, "field_"+i);
if (schemaToFilter.containsField(name)) {
newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
}
}
return newFields;
}
示例30
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));
return funcList;
}