Java源码示例:org.apache.pig.impl.logicalLayer.schema.Schema

示例1
@Before
public void setUp() throws Exception {
    ArrayList<Tuple> tuples = new ArrayList<Tuple>();

    log.info("Setting up");

    pigServer = new PigServer(ExecType.LOCAL);
    data = resetData(pigServer);

    Random r = new Random();
    for (int i = 0; i < MAX; i++) {
        tuples.add(tuple(i,GenRandomData.genRandString(r)));
    }

    Schema s = new Schema();
    s.add(new Schema.FieldSchema("index", DataType.INTEGER));
    s.add(new Schema.FieldSchema("name", DataType.CHARARRAY));
    data.set("test", s, tuples);
}
 
示例2
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {

	final List<FuncSpec> funcList = new ArrayList<FuncSpec>();

	/*either two chararray arguments*/
	List<FieldSchema> fields = new ArrayList<FieldSchema>();
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));

	Schema twoArgInSchema = new Schema(fields);

	funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));

	/*or two chararray and a boolean argument*/
	fields = new ArrayList<FieldSchema>();
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));

	Schema threeArgInSchema = new Schema(fields);

	funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema));

	return funcList;
}
 
示例3
@SuppressWarnings("unchecked")
private static void writeField(String name, Object value, Byte type, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth) throws IOException {
    if (shouldWriteField(name, properties, depth)) {
        String operation = getPartialOperation(mapPartialOperationMap, name, properties);
        // check if the name has the property update-map-fields/remove-map-fields
        // if yes, we need special treatments here as we need to loop through the tuple
        // be aware the the operation here is not vespa operation such as "put" and "update"
        // operation here are the field name we wish use to such as "assign" and "remove"
        if (operation != null) {
            writePartialUpdateAndRemoveMap(name, value, g, properties, schema, op, depth, operation);
        } else {
            g.writeFieldName(name);
            if (shouldWritePartialUpdate(op, depth)) {
                writePartialUpdate(value, type, g, name, properties, schema, op, depth);
            } else {
                writeValue(value, type, g, name, properties, schema, op, depth);
            }
        }

    }
}
 
示例4
@Override
public Schema outputSchema(Schema input) {
  if (null != this.schemaFunction) {
    try {
      Tuple t = TupleFactory.getInstance().newTuple(1);
      // Strip enclosing '{}' from schema
      t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
      return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
    } catch (ParserException pe) {
      throw new RuntimeException(pe);
    } catch (IOException ioe) {
      throw new RuntimeException(ioe);
    }
  } else {
    return this.schema;
  }
}
 
示例5
/**
 * Test one-level Pig Schema: multiple fields for a bag
 */
@Test
public void testResourceSchemaWithInvalidPigSchema() 
throws FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    Schema level0 = TypeCheckingTestUtil.genFlatSchema(
            aliases,types);
    Schema.FieldSchema fld0 = 
        new Schema.FieldSchema("f0", level0, DataType.BAG);
    Schema level1 = new Schema(fld0);
    try {
        Schema.getPigSchema(new ResourceSchema(level1));
        Assert.fail();
    } catch(FrontendException e) {
        assertTrue(e.getErrorCode()==2218);
    }
}
 
示例6
@Test
public void testDescribeNestedAlias() throws Exception{
    String[] input = {
            "1\t3",
            "2\t4",
            "3\t5"
    };

    Util.createInputFile(cluster, "table_testDescribeNestedAlias", input);
    pigServer.registerQuery("A = LOAD 'table_testDescribeNestedAlias' as (a0, a1);");
    pigServer.registerQuery("P = GROUP A by a1;");
    // Test RelationalOperator
    pigServer.registerQuery("B = FOREACH P { D = ORDER A by $0; generate group, D.$0; };");

    // Test ExpressionOperator - negative test case
    pigServer.registerQuery("C = FOREACH A { D = a0/a1; E=a1/a0; generate E as newcol; };");
    Schema schema = pigServer.dumpSchemaNested("B", "D");
    Assert.assertTrue(schema.toString().equalsIgnoreCase("{a0: bytearray,a1: bytearray}"));
    try {
        schema = pigServer.dumpSchemaNested("C", "E");
    } catch (FrontendException e) {
        Assert.assertTrue(e.getErrorCode() == 1113);
    }
}
 
示例7
@Test
public void getBagTest() throws Exception
{
   ReportBuilder udf = new ReportBuilder();
   udf.setUDFContextSignature("test");
   List<Schema.FieldSchema> fieldSchemaList = new ArrayList<Schema.FieldSchema>();
   fieldSchemaList.add(new Schema.FieldSchema("msisdn", DataType.LONG));
   fieldSchemaList.add(new Schema.FieldSchema("ts", DataType.INTEGER));
   fieldSchemaList.add(new Schema.FieldSchema("center_lon", DataType.DOUBLE));
   fieldSchemaList.add(new Schema.FieldSchema("center_lat", DataType.DOUBLE));
   Schema schemaTuple = new Schema(fieldSchemaList);
   Schema schemaBag = new Schema(new Schema.FieldSchema(ReportBuilder.ORDERED_ROUTES, schemaTuple, DataType.BAG));
   udf.outputSchema(schemaBag);

   Tuple inputTuple = TupleFactory.getInstance().newTuple();
   DataBag inputBag = BagFactory.getInstance().newDefaultBag();
   inputBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(71230000000L, 1382351612, 10.697, 20.713)));
   inputTuple.append(inputBag);
   DataBag outputBag = udf.exec(inputTuple);
   Assert.assertEquals(inputBag, outputBag);
}
 
示例8
@Test //end to end test
public void testLimitStoreSchema1() throws Exception{
    Util.createLocalInputFile("student", new String[]{"joe smith:18:3.5","amy brown:25:2.5","jim fox:20:4.0","leo fu:55:3.0"});
    
    pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);");
    pigServer.registerQuery("d = distinct a;");
    pigServer.registerQuery("lim = limit d 1;");
    String outFile = "limitSchemaOut";
    Util.deleteDirectory(new File(outFile));
    pigServer.store("lim", outFile,  "PigStorage('\\t', '-schema')");
    pigServer.dumpSchema("lim");
    
    pigServer.registerQuery("b = LOAD '" + outFile + "' using PigStorage('\\t', '-schema');");
    Schema genSchema = pigServer.dumpSchema("b");
    System.err.println(genSchema);
    Assert.assertNotNull(genSchema);
    
}
 
示例9
private Schema buildElNinoInputSchema() throws FrontendException {

    	// Build Field Schema
    	List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
        fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("buoy"       , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("day"        , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("latitude"   , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("longitude"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("zon_winds"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("mer_winds"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("humidity"   , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("airtemp"    , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("s_s_temp"   , DataType.DOUBLE   ));

        return new Schema(fieldSchemas);

    }
 
示例10
/**
 * <ul> steps:
 * <li>Writes using the thrift mapping
 * <li>Reads using the pig mapping
 * <li>Use Elephant bird to convert from thrift to pig
 * <li>Check that both transformations give the same result
 * @param o the object to convert
 * @throws TException
 */
public static <T extends TBase<?,?>> void validateSameTupleAsEB(T o) throws TException {
  final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter();
  @SuppressWarnings("unchecked")
  final Class<T> class1 = (Class<T>) o.getClass();
  final MessageType schema = thriftSchemaConverter.convert(class1);

  final StructType structType = ThriftSchemaConverter.toStructType(class1);
  final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
  final Schema pigSchema = thriftToPig.toSchema();
  final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true);
  RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
  final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
  ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
  o.write(p);
  final Tuple t = tupleRecordConverter.getCurrentRecord();
  final Tuple expected = thriftToPig.getPigTuple(o);
  assertEquals(expected.toString(), t.toString());
  final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema);
  assertEquals(schema.toString(), filtered.toString());
}
 
示例11
@Override
   public Schema outputSchema(Schema input) {
// "dimensions" string is the default namespace assigned to the output
// schema. this can be overridden by specifying user defined schema
// names in foreach operator. if user defined schema names are not
// specified then the output schema of foreach operator using this UDF
// will have "dimensions::" namespace for all fields in the tuple
try {
    return new Schema(new FieldSchema("dimensions", input, DataType.BAG));
} catch (FrontendException e) {
    // we are specifying BAG explicitly, so this should not happen.
    throw new RuntimeException(e);
}
   }
 
示例12
@Test
public void testRangeOrderByStartNOSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + " o = order l1 by $3 .. DESC;"
        ;
    compileAndCompareSchema((Schema)null, query, "o");

    //check number of sort expression plans

    LogicalPlan lp = createAndProcessLPlan(query);
    boolean[] isAsc = {false};
    checkNumExpressionPlansForSort(lp, 1, isAsc);

    Util.registerMultiLineQuery(pigServer, query);

    pigServer.explain("o", System.err);
    Iterator<Tuple> it = pigServer.openIterator("o");

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(11,21,31,41,51)",
                        "(10,20,30,40,50)",
                });
    Util.checkQueryOutputs(it, expectedRes);
}
 
示例13
private static void writePartialUpdateAndRemoveMap(String name, Object value, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth, String operation) throws IOException {
    schema = (schema != null) ? schema.getField(0).schema : null;
    // extract the key of map and keys in map for writing json when partial updating maps
    Schema valueSchema = (schema != null) ? schema.getField(1).schema : null;
    // data format  { ( key; id, value: (abc,123,(123234,bbaa))) }
    // the first element of each tuple in the bag will be the map to update
    // the second element of each tuple in the bag will be the new value of the map
    DataBag bag = (DataBag) value;
    for (Tuple element : bag) {
        if (element.size() != 2) {
            continue;
        }
        String k = (String) element.get(0);
        Object v = element.get(1);
        Byte t = DataType.findType(v);
        if (t == DataType.TUPLE) {
            g.writeFieldName(name + "{" + k + "}");
            if (operation.equals(PARTIAL_UPDATE_REMOVE)) {
                g.writeStartObject();
                g.writeFieldName(PARTIAL_UPDATE_REMOVE);
                g.writeNumber(0);
                g.writeEndObject();
            } else {
                writePartialUpdate(v, t, g, name, properties, valueSchema, op, depth);
            }
        }
    }
}
 
示例14
@Test
public void testComplexCast2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast2", input);

    String query = "a = load '" + tmpDirName + "/testComplexCast2' as (m:[int]);" +
        "b = foreach a generate ([long])m;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[long]}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Long);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Long);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
示例15
@Override
public Schema outputSchema(Schema input) {
    try {
        if (input.size() < 3) {
            return null;
        }
        return new Schema(input.getField(2));
    } catch (Exception e) {
        return null;
    }
}
 
示例16
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    List<Schema.FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
    fields.add(new Schema.FieldSchema(null, DataType.DOUBLE));
    fields.add(new Schema.FieldSchema(null, DataType.DOUBLE));
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(fields)));

    return funcList;
}
 
示例17
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DOUBLE))));

    return funcList;
}
 
示例18
private boolean nullEquals(Schema currentSchema, Schema newSchema) {
    if(currentSchema == null){
        if(newSchema != null){
            return false;
        }
        return true;
    }
    return Schema.equals(currentSchema, newSchema, false, true);
}
 
示例19
public void add(Schema schema, Map<?, ?> m) {
  super.add(m);
  size.add(m.size());
  FieldSchema field = getField(schema, 0);
  if (m.size() > 0 && key == null) {
    key = new FieldSummaryData();
    key.setName(getName(field));
    value = new FieldSummaryData();
    value.setName(getName(field));
  }
  for (Map.Entry<?, ?> entry : m.entrySet()) {
    key.add(null, entry.getKey());
    value.add(getSchema(field), entry.getValue());
  }
}
 
示例20
public static Schema genFlatSchema(String[] aliases, byte[] types) {
    if (aliases.length != types.length) {
        throw new AssertionError(" aliase number and type number don't match") ;
    }
    List<Schema.FieldSchema> fsList = new ArrayList<Schema.FieldSchema>() ;
    for(int i=0; i<aliases.length ;i++) {
        fsList.add(new Schema.FieldSchema(aliases[i], types[i])) ;
    }
    return new Schema(fsList) ;
}
 
示例21
@Test
public void testNullTypeInTuple() throws IOException {
    Query query = new Query();
    query.value = "";
    Schema fakeSchema = getSchema(makeFieldSchema("a", DataType.NULL));
    Tuple fakeTuple = makeTuple("something");

    sty = getSty(withMockResult(withMockSchema(getServer(), fakeSchema), fakeTuple));
    runWithoutOutput(() -> sty.execute(query));
    Assert.assertFalse(query.failed());
    List<TypedObject> result = query.getResult().getColumn("a").getValues();
    Assert.assertNotNull(result);
    Assert.assertEquals(result.size(), 1);
    Assert.assertNull(result.get(0));
}
 
示例22
@Test
public void testRangeOrderByMixNOSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + " o = order l1 by  $1 .. $2 DESC,  $0 , $4 .. DESC;"
        ;
    compileAndCompareSchema((Schema)null, query, "o");

    //check number of sort expression plans

    LogicalPlan lp = createAndProcessLPlan(query);
    boolean[] isAsc = {false, false,true,false};
    checkNumExpressionPlansForSort(lp, 4, isAsc);

    Util.registerMultiLineQuery(pigServer, query);

    pigServer.explain("o", System.err);
    Iterator<Tuple> it = pigServer.openIterator("o");

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(11,21,31,41,51)",
                        "(10,20,30,40,50)",
                });
    Util.checkQueryOutputs(it, expectedRes);
}
 
示例23
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));

    return funcList;
}
 
示例24
@Override
public Schema outputSchema(Schema input)
{
    try {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.BAG)
        {
          throw new RuntimeException("Expected a BAG as input");
        }
        
        Schema inputBagSchema = inputFieldSchema.schema;
        
        if (inputBagSchema.getField(0).type != DataType.TUPLE)
        {
          throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                                   DataType.findTypeName(inputBagSchema.getField(0).type)));
        }
        
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             DataType.DOUBLE));
      } catch (FrontendException e) {
        throw new RuntimeException(e);
      }
 }
 
示例25
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
示例26
@Test
public void testUnTypedMap() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testUnTypedMap", input);

    String query = "a = load '" + tmpDirName + "/testUnTypedMap' as (m:[]);";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("a");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[]}");
    Iterator<Tuple> it = pig.openIterator("a");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataByteArray);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof DataByteArray);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
示例27
@Test
public void testNestedSortEndToEnd1() throws Exception {
    File tmpFile1 = Util.createTempFileDelOnExit("test", "txt");
    PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1));
    ps1.println("1\t2\t3");
    ps1.println("1\t3\t4");
    ps1.println("1\t2\t4");
    ps1.println("1\t2\t4");
    ps1.println("1\t2\t4");
    ps1.println("2\t3\t4");
    ps1.close();

    String expected[] = {
            "(2,{(2,3,4)})",
            "(1,{(1,2,3),(1,2,4),(1,2,4),(1,2,4),(1,3,4)})"
    };

    String clusterPath = Util.removeColon(tmpFile1.getCanonicalPath());

    Util.copyFromLocalToCluster(cluster, tmpFile1.getCanonicalPath(), clusterPath);
    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(clusterPath) + "' AS (a0, a1, a2);");
    pigServer.registerQuery("B = group A by $0 parallel 2;");
    pigServer.registerQuery("C = foreach B { D = limit A 10; E = order D by $1; generate group, E;};");
    Iterator<Tuple> iter = pigServer.openIterator("C");
    Schema s = pigServer.dumpSchema("C");

    Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(s));
    Util.deleteFile(cluster, clusterPath);
}
 
示例28
@Test
public void testMergeJoinSch2() throws IOException{
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
    Schema mjSch = null, shjSch = null;
    pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using 'merge';");
    mjSch = pigServer.dumpSchema("C");
    pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
    shjSch = pigServer.dumpSchema("C");
    Assert.assertTrue(shjSch == null);
}
 
示例29
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}
 
示例30
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));

    return funcList;
}