Java源码示例:org.apache.arrow.vector.util.Text

示例1
/**
 * Converts certain Arrow POJOs to Java POJOs to make downstream conversion easier.
 *
 * @param columnName column name where the input object comes from
 * @param object the input object
 * @param recordMetadata metadata object from glue table that contains customer specified information
 *                       such as desired default timezone, or datetime formats
 * @return the converted-to object if convertible, otherwise the original object
 */
public static Object convertArrowTypeIfNecessary(String columnName, Object object, DDBRecordMetadata recordMetadata)
{
    if (object instanceof Text) {
        return object.toString();
    }
    else if (object instanceof LocalDateTime) {
        String datetimeFormat = recordMetadata.getDateTimeFormat(columnName);
        DateTimeZone dtz = DateTimeZone.forID(recordMetadata.getDefaultTimeZone().toString());
        if (datetimeFormat != null) {
            return ((LocalDateTime) object).toDateTime(dtz).toLocalDateTime().toString(datetimeFormat);
        }
        return ((LocalDateTime) object).toDateTime(dtz).getMillis();
    }
    return object;
}
 
示例2
/**
 * This method is a helper method added for DRILL-951
 * TextRecordReader to call this method to get field names out
 * @return array of field data strings
 */
public String [] getTextOutput () throws ExecutionSetupException {
  if (recordCount == 0 || fieldIndex == -1) {
    return null;
  }

  //Currently only first line header is supported. Return only first record.
  int retSize = fieldIndex+1;
  String [] out = new String [retSize];

  try {
    ListVector listVector = output.addField(new Field(COL_NAME, true, MinorType.LIST.getType(), null), ListVector.class);
    List outputlist = (List) listVector.getObject((int)(recordCount-1));

    for (int i=0; i<retSize; i++){
      out[i] = ((Text) outputlist.get(i)).toString();
    }
    return out;
  } catch (SchemaChangeException e) {
    throw new ExecutionSetupException(e);
  }
}
 
示例3
@Test
/**
 * Tests the reading of nullable var length columns, runs the tests twice, once on a file that has
 * a converted type of UTF-8 to make sure it can be read
 */
public void testNullableColumnsVarLen() throws Exception {
  HashMap<String, FieldInfo> fields = new HashMap<>();
  ParquetTestProperties props = new ParquetTestProperties(1, 300000, DEFAULT_BYTES_PER_PAGE, fields);
  byte[] val = {'b'};
  byte[] val2 = {'b', '2'};
  byte[] val3 = {'b', '3'};
  byte[] val4 = { 'l','o','n','g','e','r',' ','s','t','r','i','n','g'};
  Object[] byteArrayVals = { val, val2, val4};
  props.fields.put("a", new FieldInfo("boolean", "a", 1, byteArrayVals, TypeProtos.MinorType.BIT, props));
  testParquetFullEngineEventBased(false, "/parquet/parquet_nullable_varlen.json", "/tmp/nullable_varlen.parquet", 1, props);
  HashMap<String, FieldInfo> fields2 = new HashMap<>();
  // pass strings instead of byte arrays
  Object[] textVals = { new org.apache.arrow.vector.util.Text("b"), new org.apache.arrow.vector.util.Text("b2"),
      new org.apache.arrow.vector.util.Text("b3") };
  ParquetTestProperties props2 = new ParquetTestProperties(1, 30000, DEFAULT_BYTES_PER_PAGE, fields2);
  props2.fields.put("a", new FieldInfo("boolean", "a", 1, textVals, TypeProtos.MinorType.BIT, props2));
  testParquetFullEngineEventBased(false, "/parquet/parquet_scan_screen_read_entry_replace.json",
      "\"/tmp/varLen.parquet/a\"", "unused", 1, props2);

}
 
示例4
public static List<String> getRefreshPath(final JobId jobId, final Path accelerationBasePath, JobsService jobsService, BufferAllocator allocator) {
  // extract written path from writer's metadata
  try (JobDataFragment data = JobDataClientUtils.getJobData(jobsService, allocator, jobId, 0, 1)) {
    Text text = (Text) Preconditions.checkNotNull(data.extractValue(RecordWriter.PATH_COLUMN, 0),
      "Empty write path for job %s", jobId.getId());

    // relative path to the acceleration base path
    final String path = PathUtils.relativePath(Path.of(text.toString()), accelerationBasePath);

    // extract first 2 components of the path "<reflection-id>."<modified-materialization-id>"
    List<String> components = PathUtils.toPathComponents(path);
    Preconditions.checkState(components.size() >= 2, "Refresh path %s is incomplete", path);

    return ImmutableList.of(ACCELERATOR_STORAGEPLUGIN_NAME, components.get(0), components.get(1));
  }
}
 
示例5
/** Helper method to get the values in given range in colVarChar vector used in this test class. */
private static List<String> getVarCharValues(VectorContainer container, int start, int end) {
  FieldReader reader = container.getValueAccessorById(VarCharVector.class, 1).getValueVector().getReader();

  List<String> values = Lists.newArrayList();
  for(int i=start; i<end; i++) {
    reader.setPosition(i);
    if (reader.isSet()) {
      final Text val = reader.readText();
      values.add(val == null ? null : val.toString());
    } else {
      values.add(null);
    }
  }

  return values;
}
 
示例6
public void writeVarChar(FieldReader reader, JsonOutputContext context) throws IOException {
  if (reader.isSet()) {
    // NB: For UnionReader(s), reader.isSet() checks if the reader has a type set, not if the data itself is null
    final Text text = reader.readText();
    if (text != null) {
      String value = text.toString();
      if (value.length() > context.getRemaining()) {
        // Truncate the string if the space is limited
        value = value.substring(0, context.getRemaining());
        context.setTruncated();
      }
      writeVarChar(value);
      context.used(value.length());
      return;
    }
  }
  // Either the type or the value is not set
  writeNull(context);
}
 
示例7
public static boolean castToInteger(FieldReader reader, NullableBigIntHolder out) {
  Object o = reader.readObject();
  if (o instanceof Number) {
    out.value = ((Number) o).longValue();
    return true;
  } else if (o instanceof Boolean) {
    out.value = ((Boolean) o).booleanValue() ? 1 : 0;
    return true;
  } else if (o instanceof LocalDateTime) {
    out.value = toMillis((LocalDateTime) o);
    return true;
  } else if (o instanceof Text) {
    try {
      String s = Text.decode(((Text) o).getBytes(), 0, ((Text) o).getLength());
      return parseLong(s, out);
    } catch (CharacterCodingException e) {
      // TODO: is this the best way?
      logger.warn("Can't decode text", e);
      return false;
    }
  } else if (o instanceof byte[]) {
    return false; // TODO
  }
  return false;
}
 
示例8
private static Object convert(Object value)
{
    if (value instanceof Text) {
        return ((Text) value).toString();
    }
    return value;
}
 
示例9
private String getValue(Block block, int row, String fieldName)
{
    VarCharReader reader = block.getFieldReader(fieldName);
    reader.setPosition(row);
    if (reader.isSet()) {
        Text result = reader.readText();
        return (result == null) ? null : result.toString();
    }

    return null;
}
 
示例10
/**
 * Get the variable length element at specified index as Text.
 *
 * @param index position of element to get
 * @return Text object for non-null element, null otherwise
 */
public Text getObject(int index) {
  Text result = new Text();
  byte[] b;
  try {
    b = get(index);
  } catch (IllegalStateException e) {
    return null;
  }
  result.set(b);
  return result;
}
 
示例11
/**
 * Insert at a position from l1 & immediately update it with a new value from l2,
 * repeat N times.
 */
private void TestInterLeaved(MutableVarcharVector m1, LinkedList<String> l1, LinkedList<String> l2) {

  int expectedOffset = 0;
  for (int i = 0; i < TOTAL_STRINGS; ++i) {
    //insert at i from l1
    m1.setSafe(i, new Text(l1.get(i)));
    //update at i from l2
    m1.setSafe(i, new Text(l2.get(i)));

    expectedOffset += l2.get(i).length();
  }

  final int oldOffset = m1.getCurrentOffset();
  System.out.println("TestInterLeaved: buffer capacity: " + m1.getByteCapacity());
  System.out.println("TestInterLeaved: Offset in buffer: " + oldOffset);
  System.out.println("TestInterLeaved: Sum of all valid strings: " + expectedOffset);

  final Stopwatch ctcWatch = Stopwatch.createUnstarted();
  ctcWatch.start();
  m1.forceCompact();
  ctcWatch.stop();

  final int newOffset = m1.getCurrentOffset();
  System.out.println("TestInterLeaved: Post Compaction: Offset in buffer: " + newOffset);
  System.out.println("TestInterLeaved: Compaction time: " + ctcWatch.toString() + "\n");
  Assert.assertTrue(newOffset <= oldOffset);

  //post compaction, the offset should be just the sum of lengths of all valid data.
  Assert.assertEquals(expectedOffset, newOffset);

  //match from l2
  for (int i = 0; i < TOTAL_STRINGS; ++i) {
    Assert.assertEquals(l2.get(i) /*expected*/, m1.getObject(i).toString() /*actual*/);
  }
}
 
示例12
@Test
public void testFileWithNulls() throws Exception {
  HashMap<String, FieldInfo> fields3 = new HashMap<>();
  ParquetTestProperties props3 = new ParquetTestProperties(1, 3000, DEFAULT_BYTES_PER_PAGE, fields3);
  // actually include null values
  Object[] valuesWithNull = {new Text(""), new Text("longer string"), null};
  props3.fields.put("a", new FieldInfo("boolean", "a", 1, valuesWithNull, TypeProtos.MinorType.BIT, props3));
  testParquetFullEngineEventBased(false, "/parquet/parquet_scan_screen_read_entry_replace.json",
      "\"/tmp/nullable_with_nulls.parquet\"", "unused", 1, props3);

}
 
示例13
public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords,
                                        List<QueryDataBatch> records,
                                        RecordBatchLoader loader)
    throws SchemaChangeException, UnsupportedEncodingException {
  long totalRecords = 0;
  QueryDataBatch batch;
  int size = records.size();
  for (int i = 0; i < size; i++) {
    batch = records.get(0);
    loader.load(batch.getHeader().getDef(), batch.getData());
    // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
    // SchemaChangeException, so check/clean throws clause above.
    logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
    totalRecords += loader.getRecordCount();
    for (int j = 0; j < loader.getRecordCount(); j++) {
      Map<String, Object> record = new LinkedHashMap<>();
      for (VectorWrapper<?> w : loader) {
        Object obj = getVectorObject(w.getValueVector(), j);
        if (obj != null) {
          if (obj instanceof Text) {
            obj = obj.toString();
          }
          record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
        }
        record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
      }
      materializedRecords.add(record);
    }
    records.remove(0);
    batch.release();
    loader.clear();
  }
}
 
示例14
/**
 * Convenience method to create a {@link JsonStringArrayList list} from the given values.
 */
public static JsonStringArrayList<Object> listOf(Object... values) {
  final JsonStringArrayList<Object> list = new JsonStringArrayList<>();
  for (Object value:values) {
    if (value instanceof CharSequence) {
      list.add(new Text(value.toString()));
    } else {
      list.add(value);
    }
  }
  return list;
}
 
示例15
public static boolean castToFloat(FieldReader reader, NullableFloat8Holder out) {
  Object o = reader.readObject();
  if (o instanceof Number) {
    out.value = ((Number) o).doubleValue();
    return true;
  } else if (o instanceof Boolean) {
    out.value = ((Boolean) o).booleanValue() ? 1 : 0;
    return true;
  } else if (o instanceof LocalDateTime) {
    out.value = toMillis((LocalDateTime) o);
    return true;
  } else if (o instanceof Text) {
    String s;
    try {
      s = Text.decode(((Text) o).getBytes(), 0, ((Text) o).getLength());
      if (!isNumeric(s)) {
        return false;
      }
      out.value = Double.parseDouble(s);
      return true;
    } catch (Exception e) {
      // TODO: is this the best way?
      logger.warn("Can't decode text to FLOAT", e);
      return false;
    }
  } else if (o instanceof byte[]) {
    return false; // TODO
  }
  return false;
}
 
示例16
public static boolean castToBoolean(FieldReader reader, NullableBitHolder out) {
  Object o = reader.readObject();
  if (o instanceof Number) {
    out.value = ((Number) o).doubleValue() != 0 ? 1 : 0;
    return true;
  } else if (o instanceof Boolean) {
    out.value = ((Boolean) o).booleanValue() ? 1 : 0;
    return true;
  } else if (o instanceof LocalDateTime) {
    out.value = toMillis((LocalDateTime) o) != 0 ? 1 : 0;
    return true;
  } else if (o instanceof Text) {
    try {
      String s = Text.decode(((Text) o).getBytes(), 0, ((Text) o).getLength());
      if((s == null)
        || (s.length() == 0)
        || ("false".equalsIgnoreCase(s))
        || ("f".equalsIgnoreCase(s))
        || ("0".equals(s))
        || ("0.0".equals(s))) {
        out.value = 0;
        return true;
      } else {
        out.value = 1;
        return true;
      }
    } catch (CharacterCodingException e) {
      logger.warn("Can't decode text", e);
      return false;
    }
  }

  return false;
}
 
示例17
/**
 * Test to ensure Dremio reads list of primitive data types
 * @throws Exception
 */
private void readStructHiveDataTypes(String table) throws Exception {
  int[] testrows = {0, 500, 1022, 1023, 1024, 4094, 4095, 4096, 4999};
  for (int i = 0; i < testrows.length; ++i) {
    Integer index = new Integer(testrows[i]);
    JsonStringHashMap<String, Object> structrow1 = new JsonStringHashMap<String, Object>();
    structrow1.put("tinyint_field", new Integer(1));
    structrow1.put("smallint_field", new Integer(1024));
    structrow1.put("int_field", new Integer(testrows[i]));
    structrow1.put("bigint_field", new Long(90000000000L));
    structrow1.put("float_field", new Float(testrows[i]));
    structrow1.put("double_field", new Double(testrows[i]));
    structrow1.put("string_field", new Text(Integer.toString(testrows[i])));


    testBuilder().sqlQuery("SELECT * FROM hive." + table +
      " order by rownum limit 1 offset " + index.toString())
      .ordered()
      .baselineColumns("rownum", "struct_field")
      .baselineValues(new Integer(index), structrow1)
      .go();

    testBuilder().sqlQuery("SELECT rownum, struct_field['string_field'] AS string_field, struct_field['int_field'] AS int_field FROM hive." + table +
      " order by rownum limit 1 offset " + index.toString())
      .ordered()
      .baselineColumns("rownum", "string_field", "int_field")
      .baselineValues(new Integer(index), Integer.toString(testrows[i]), new Integer(index))
      .go();
  }
}
 
示例18
@Test
public void testArrayOfIPv4() throws Exception {

  String ip1 = "10.0.0.1";
  String ip2 = "192.168.0.1";
  String ip3 = "10.0.8.6";
  String ip4 = "10.0.8.5";

  ElasticsearchCluster.ColumnData[] data = new ElasticsearchCluster.ColumnData[]{
          new ElasticsearchCluster.ColumnData("ip_field", IP, null,
                  new Object[][]{
                          new Object[]{ip1, ip2, ip3},
                          new Object[]{ip4, ip1}
                  })
  };

  elastic.load(schema, table, data);


  JsonStringArrayList<Text> values1 = new JsonStringArrayList<>();
  JsonStringArrayList<Text> values2 = new JsonStringArrayList<>();

  values1.add(new Text(ip1));
  values1.add(new Text(ip2));
  values1.add(new Text(ip3));

  values2.add(new Text(ip4));
  values2.add(new Text(ip1));

  testBuilder()
          .sqlQuery("select ip_field from elasticsearch." + schema + "." + table)
          .baselineColumns("ip_field")
          .unOrdered()
          .baselineValues(values1)
          .baselineValues(values2)
          .go();
}
 
示例19
private Projection createSimpleValueProjection(Types.MinorType minorType)
{
    switch (minorType) {
        case DATEMILLI:
            return (fieldReader) -> {
                if (Objects.isNull(fieldReader.readLocalDateTime())) {
                    return null;
                }
                long millis = fieldReader.readLocalDateTime().toDateTime(org.joda.time.DateTimeZone.UTC).getMillis();
                return Instant.ofEpochMilli(millis).atZone(BlockUtils.UTC_ZONE_ID).toLocalDateTime();
            };
        case TINYINT:
        case UINT1:
            return (fieldReader) -> fieldReader.readByte();
        case UINT2:
            return (fieldReader) -> fieldReader.readCharacter();
        case SMALLINT:
            return (fieldReader) -> fieldReader.readShort();
        case DATEDAY:
            return (fieldReader) -> {
                Integer intVal = fieldReader.readInteger();
                if (Objects.isNull(intVal)) {
                    return null;
                }
                return LocalDate.ofEpochDay(intVal);
            };
        case INT:
        case UINT4:
            return (fieldReader) -> fieldReader.readInteger();
        case UINT8:
        case BIGINT:
            return (fieldReader) -> fieldReader.readLong();
        case DECIMAL:
            return (fieldReader) -> fieldReader.readBigDecimal();
        case FLOAT4:
            return (fieldReader) -> fieldReader.readFloat();
        case FLOAT8:
            return (fieldReader) -> fieldReader.readDouble();
        case VARCHAR:
            return (fieldReader) -> {
                Text text = fieldReader.readText();
                if (Objects.isNull(text)) {
                    return null;
                }
                return text.toString();
            };
        case VARBINARY:
            return (fieldReader) -> fieldReader.readByteArray();
        case BIT:
            return (fieldReader) -> fieldReader.readBoolean();
        default:
            throw new IllegalArgumentException("Unsupported type " + minorType);
    }
}
 
示例20
/**
 * Used to get values (Integer, Double, String, etc) from the Arrow values in the fieldReader
 * @param fieldReader the field reader containing the arrow value
 * @return the value object in java type
 */
public static Object getValue(FieldReader fieldReader, int pos)
{
    fieldReader.setPosition(pos);

    Types.MinorType minorType = fieldReader.getMinorType();
    switch (minorType) {
        case DATEMILLI:
            if (Objects.isNull(fieldReader.readLocalDateTime())) {
                return null;
            }
            long millis = fieldReader.readLocalDateTime().toDateTime(org.joda.time.DateTimeZone.UTC).getMillis();
            return Instant.ofEpochMilli(millis).atZone(UTC_ZONE_ID).toLocalDateTime();
        case TINYINT:
        case UINT1:
            return fieldReader.readByte();
        case UINT2:
            return fieldReader.readCharacter();
        case SMALLINT:
            return fieldReader.readShort();
        case DATEDAY:
            Integer intVal = fieldReader.readInteger();
            if (Objects.isNull(intVal)) {
                return null;
            }
            return LocalDate.ofEpochDay(intVal);
        case INT:
        case UINT4:
            return fieldReader.readInteger();
        case UINT8:
        case BIGINT:
            return fieldReader.readLong();
        case DECIMAL:
            return fieldReader.readBigDecimal();
        case FLOAT4:
            return fieldReader.readFloat();
        case FLOAT8:
            return fieldReader.readDouble();
        case VARCHAR:
            Text text = fieldReader.readText();
            if (Objects.isNull(text)) {
                return null;
            }
            return text.toString();
        case VARBINARY:
            return fieldReader.readByteArray();
        case BIT:
            return fieldReader.readBoolean();
        case LIST:
            return readList(fieldReader);
        case STRUCT:
            return readStruct(fieldReader);
        default:
            throw new IllegalArgumentException("Unsupported type " + minorType);
    }
}
 
示例21
private void writeColumn(FieldVector fieldVector, int idx)
{
    if (fieldVector instanceof IntVector) {
        IntVector intVector = (IntVector) fieldVector;
        intVector.setSafe(idx, idx + 100);
        return;
    }

    if (fieldVector instanceof Float4Vector) {
        Float4Vector float4Vector = (Float4Vector) fieldVector;
        float4Vector.setSafe(idx, idx + 100.1f);
        return;
    }

    if (fieldVector instanceof Float8Vector) {
        Float8Vector float8Vector = (Float8Vector) fieldVector;
        float8Vector.setSafe(idx, idx + 100.2);
        return;
    }

    if (fieldVector instanceof VarCharVector) {
        VarCharVector varCharVector = (VarCharVector) fieldVector;
        varCharVector.setSafe(idx, new Text(idx + "-my-varchar"));
        return;
    }

    if (fieldVector instanceof ListVector) {
        BlockUtils.setComplexValue(fieldVector,
                idx,
                FieldResolver.DEFAULT,
                ImmutableList.of(idx + 100, idx + 200, idx + 300));
        return;
    }

    if (fieldVector instanceof StructVector) {
        Map<String, Object> input = ImmutableMap.of("intVal", idx + 100, "doubleVal", idx + 200.2);
        BlockUtils.setComplexValue(fieldVector,
                idx,
                FieldResolver.DEFAULT,
                input);
        return;
    }

    throw new IllegalArgumentException("Unsupported fieldVector " + fieldVector.getClass().getCanonicalName());
}
 
示例22
/**
 * Used to convert from Apache Arrow typed values to HBase values.
 *
 * @param isNative If True, the HBase value should be stored using native bytes.
 * If False, the value should be serialized as a String before storing it.
 * @param value The value to convert.
 * @return The HBase byte representation of the value.
 * @note This is commonly used when attempting to push constraints into HBase which requires converting a small
 * number of values from Apache Arrow's Type system to HBase compatible representations for comparisons.
 */
public static byte[] toBytes(boolean isNative, Object value)
{
    if (value == null || value instanceof byte[]) {
        return (byte[]) value;
    }

    if (value instanceof String) {
        return ((String) value).getBytes();
    }

    if (value instanceof Text) {
        return ((Text) value).toString().getBytes();
    }

    if (!isNative) {
        return String.valueOf(value).getBytes();
    }

    if (value instanceof Integer) {
        return ByteBuffer.allocate(4).putInt((int) value).array();
    }

    if (value instanceof Long) {
        return ByteBuffer.allocate(8).putLong((long) value).array();
    }

    if (value instanceof Float) {
        return ByteBuffer.allocate(4).putFloat((float) value).array();
    }

    if (value instanceof Double) {
        return ByteBuffer.allocate(8).putDouble((double) value).array();
    }

    if (value instanceof Boolean) {
        return ByteBuffer.allocate(1).put((byte) ((boolean) value ? 1 : 0)).array();
    }

    String className = (value == null || value.getClass() == null) ? "null" : value.getClass().getName();
    throw new RuntimeException("Unsupported object type for " + value + " with class name " + className);
}
 
示例23
/**
 * Insert 1000 strings of avg size 10
 * Update 1000 strings with avg size 100
 * Compact
 * Validate
 */
@Test
public void TestUpdateAfterInsert()
{
  final double threshold = 1.0D;
  MutableVarcharVector m1 = new MutableVarcharVector("TestUpdateAfterInsert", testAllocator, threshold /*only force compact*/);
  try {
    LinkedList<String> l1 = GetRandomStringList(TOTAL_STRINGS, smallAvgSize);
    LinkedList<String> l2 = GetRandomStringList(TOTAL_STRINGS, midAvgSize);

    //insert from l1
    for (int i = 0; i < TOTAL_STRINGS; ++i) {
      m1.setSafe(i, new Text(l1.get(i)));
    }
    m1.setValueCount(TOTAL_STRINGS);

    System.out.println("TestUpdateAfterInsert: threshold: " + threshold + " (only forcecompact)");
    System.out.println("TestUpdateAfterInsert: buffer capacity: " + m1.getByteCapacity());
    System.out.println("TestUpdateAfterInsert: Inserted " + TOTAL_STRINGS  + " strings with avg size: " + smallAvgSize);
    System.out.println("TestUpdateAfterInsert: Offset in buffer: " + m1.getCurrentOffset());

    //update from l2
    for (int i = 0; i < TOTAL_STRINGS; ++i) {
      m1.setSafe(i, new Text(l2.get(i)));
    }

    System.out.println("TestUpdateAfterInsert: Updated " + TOTAL_STRINGS  + " strings with avg size: " + midAvgSize);
    System.out.println("TestUpdateAfterInsert: Offset in buffer: " + m1.getCurrentOffset());

    //compact
    final int oldOffset = m1.getCurrentOffset();
    final Stopwatch ctcWatch = Stopwatch.createUnstarted();
    ctcWatch.start();
    m1.forceCompact();
    ctcWatch.stop();
    final int newOffset = m1.getCurrentOffset();

    System.out.println("TestUpdateAfterInsert: Post Compaction: Offset in buffer: " + newOffset);
    System.out.println("TestUpdateAfterInsert: Compaction time: " + ctcWatch.toString() + "\n");
    Assert.assertTrue(newOffset <= oldOffset);

    //match from l2
    for (int i = 0; i < TOTAL_STRINGS; ++i) {
      Assert.assertEquals(l2.get(i) /*expected*/, m1.getObject(i).toString() /*actual*/);
    }
  } finally {
    m1.close();
  }

}
 
示例24
@Test
public void TestCopyOut()
{
  MutableVarcharVector m1 = new MutableVarcharVector("TestCopyOut", testAllocator, 1.0D /* never compact */);
  VarCharVector v1 = new VarCharVector("TestCopyOutVarchar", testAllocator);
  VarCharVector v2 = new VarCharVector("TestCopyOutVarcharPostCompaction", testAllocator);
  try {
    //insert the values from l1
    LinkedList<String> l1 = GetRandomStringList(TOTAL_STRINGS, smallAvgSize);
    //update values from l2
    LinkedList<String> l2 = GetRandomStringList(TOTAL_STRINGS, midAvgSize);

    int expectedOffset = 0;
    int i = 0;
    final int firstIndex = 10; //beginning firstIndex records to be null

    //insert TOTAL_STRINGS nulls and values
    int startIdx = firstIndex;
    while (i < TOTAL_STRINGS) {
      //insert
      m1.setSafe(startIdx, new Text(l1.get(i)));
      //update
      m1.setSafe(startIdx, new Text(l2.get(i)));

      expectedOffset += l2.get(i).length();

      ++i;
      //create null values
      startIdx += 2;
    }

    //allocate with twice the index
    v1.allocateNew(m1.getUsedByteCapacity(), startIdx * 2);

    //copy records
    m1.copyToVarchar(v1, 0, startIdx * 3);

    //reset the value count back to actual strings copied
    v1.setValueCount(startIdx);

    //validate the records, skip null values
    i = 0;
    int j = firstIndex;
    while (i < TOTAL_STRINGS) {
      Assert.assertEquals(l2.get(i) /*expected*/, v1.getObject(j).toString() /*actual*/);
      ++i;
      j += 2; //skip nulls
    }

    //counters must match
    Assert.assertEquals(startIdx, j);
    Assert.assertEquals(TOTAL_STRINGS + firstIndex, v1.getNullCount());
  }
  finally {
    m1.close();
    v1.close();
  }

}
 
示例25
@Test
public void TestCopyOutPostCompaction()
{
  MutableVarcharVector m1 = new MutableVarcharVector("TestCopyOut", testAllocator, 1.0D /* never compact */);
  VarCharVector v2 = new VarCharVector("TestCopyOutVarcharPostCompaction", testAllocator);
  try {
    //insert the values from l1
    LinkedList<String> l1 = GetRandomStringList(TOTAL_STRINGS, smallAvgSize);
    //update values from l2
    LinkedList<String> l2 = GetRandomStringList(TOTAL_STRINGS, midAvgSize);

    int expectedOffset = 0;
    int i = 0;
    final int firstIndex = 10; //beginning firstIndex records to be null

    //insert TOTAL_STRINGS nulls and values
    int startIdx = firstIndex;
    while (i < TOTAL_STRINGS) {
      //insert
      m1.setSafe(startIdx, new Text(l1.get(i)));
      //update
      m1.setSafe(startIdx, new Text(l2.get(i)));

      expectedOffset += l2.get(i).length();

      ++i;
      //create null values
      startIdx += 2;
    }

    //Reduce garbage date
    m1.forceCompact();

    //allocate with twice the index
    v2.allocateNew(m1.getUsedByteCapacity(), startIdx * 2);

    //copy records
    m1.copyToVarchar(v2, 0, startIdx * 3);

    //reset the value count back to actual strings copied
    v2.setValueCount(startIdx);


    //verify that post compaction also data matches
    i = 0;
    int j = firstIndex;
    while (i < TOTAL_STRINGS) {
      Assert.assertEquals(l2.get(i) /*expected*/, v2.getObject(j).toString() /*actual*/);
      ++i;
      j += 2; //skip nulls
    }

    Assert.assertEquals(startIdx, j);
    Assert.assertEquals(TOTAL_STRINGS + firstIndex, v2.getNullCount());

  }
  finally {
    m1.close();
    v2.close();
  }

}
 
示例26
@Test
public void testConstExprFolding_maxDir0() throws Exception {

  test("use dfs_root");

  List<String> allFiles = ImmutableList.<String>builder()
      .add("smallfile")
      .add("SMALLFILE_2")
      .add("bigfile")
      .add("BIGFILE_2")
      .build();

  String query = "select * from dfs_root.\"" + path + "/*/*.csv\" where dir0 = %s('dfs','" + path + "')";
  for (ConstantFoldingTestConfig config : tests) {
    // make all of the other folders unexpected patterns, except for the one expected in this case
    List<String> excludedPatterns = Lists.newArrayList();
    excludedPatterns.addAll(allFiles);
    excludedPatterns.remove(config.expectedFolderName);
    // The list is easier to construct programmatically, but the API below takes an array to make it easier
    // to write a list as a literal array in a typical test definition
    String[] excludedArray = new String[excludedPatterns.size()];

    testPlanMatchingPatterns(
        String.format(query, config.funcName),
        new String[] {config.expectedFolderName},
        excludedPatterns.toArray(excludedArray));
  }

  JsonStringArrayList<Text> list = new JsonStringArrayList<>();

  list.add(new Text("1"));
  list.add(new Text("2"));
  list.add(new Text("3"));

  testBuilder()
      .sqlQuery(String.format(query, tests.get(0).funcName))
      .unOrdered()
      .baselineColumns("columns", "dir0")
      .baselineValues(list, tests.get(0).expectedFolderName)
      .go();
}
 
示例27
@Test
public void testListofListWithNulls() throws Exception {
  JsonStringArrayList<Text> thirdLevelList = new JsonStringArrayList<>();
  thirdLevelList.add(new Text("a"));
  thirdLevelList.add(null);

  JsonStringArrayList<JsonStringArrayList> secondLevelList = new JsonStringArrayList<>();
  secondLevelList.add(thirdLevelList);
  secondLevelList.add(null);

  JsonStringArrayList<JsonStringArrayList> topLevelList = new JsonStringArrayList<>();
  topLevelList.add(secondLevelList);
  topLevelList.add(null);

  String query = "SELECT " +
    "col1 " +
    "FROM cp.\"/parquet/list_list_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("col1")
    .baselineValues(topLevelList)
    .build()
    .run();

  query = "SELECT " +
    "col1[0] as c1, col1[1] as c2 " +
    "FROM cp.\"/parquet/list_list_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues(secondLevelList, null)
    .build()
    .run();

  query = "SELECT " +
    "col1[0] as c1, col1[1] as c2 " +
    "FROM cp.\"/parquet/list_list_null_test.parquet\" where col1[1] is null";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues(secondLevelList, null)
    .build()
    .run();

  query = "SELECT " +
    "col1[0][0] as c1, col1[0][1] as c2 " +
    "FROM cp.\"/parquet/list_list_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues(thirdLevelList, null)
    .build()
    .run();

  query = "SELECT " +
    "col1[0][0] as c1, col1[0][1] as c2 " +
    "FROM cp.\"/parquet/list_list_null_test.parquet\" where col1[0][1] is null";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues(thirdLevelList, null)
    .build()
    .run();
}
 
示例28
@Test
public void testListofStructWithNulls() throws Exception {
  JsonStringArrayList<Text> thirdLevelList = new JsonStringArrayList<>();
  thirdLevelList.add(new Text("a"));
  thirdLevelList.add(null);

  JsonStringHashMap<String, Object> secondLevelStruct = new JsonStringHashMap<String, Object>();
  secondLevelStruct.put("f1", thirdLevelList);

  JsonStringArrayList<JsonStringHashMap> topLevelList = new JsonStringArrayList<>();
  topLevelList.add(secondLevelStruct);
  topLevelList.add(null);

  String query = "SELECT " +
    "col1 " +
    "FROM cp.\"/parquet/list_struct_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("col1")
    .baselineValues(topLevelList)
    .build()
    .run();

  query = "SELECT " +
    "col1[0] as c1, col1[1] as c2 " +
    "FROM cp.\"/parquet/list_struct_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues(secondLevelStruct, null)
    .build()
    .run();

  query = "SELECT " +
    "col1[0] as c1, col1[1] as c2 " +
    "FROM cp.\"/parquet/list_struct_null_test.parquet\" where col1[1] is null";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues(secondLevelStruct, null)
    .build()
    .run();

  query = "SELECT " +
    "col1[0].\"f1\" as c1 " +
    "FROM cp.\"/parquet/list_struct_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1")
    .baselineValues(thirdLevelList)
    .build()
    .run();

  query = "SELECT " +
    "col1[0].\"f1\"[0] as c1, col1[0].\"f1\"[1] as c2 " +
    "FROM cp.\"/parquet/list_struct_null_test.parquet\"";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues("a", null)
    .build()
    .run();

  query = "SELECT " +
    "col1[0].\"f1\"[0] as c1, col1[0].\"f1\"[1] as c2 " +
    "FROM cp.\"/parquet/list_struct_null_test.parquet\" where col1[0].\"f1\"[1] is null";
  testBuilder()
    .sqlQuery(query)
    .ordered()
    .baselineColumns("c1", "c2")
    .baselineValues("a", null)
    .build()
    .run();
}
 
示例29
public VarChar(String obj) {
  super(obj == null ? null : new Text(obj));
}
 
示例30
public static ArrowBuf copy(FieldReader from, FieldWriter to, ArrowBuf workBuf) {
  switch (from.getMinorType()) {
  case BIGINT:
    to.bigInt().writeBigInt(from.readLong());
    break;
  case BIT:
    to.bit().writeBit(from.readBoolean() ? 1 : 0);
    break;
  case DATEMILLI:
    DateMilliHolder dateHolder = new DateMilliHolder();
    from.read(dateHolder);
    to.dateMilli().write(dateHolder);
    break;
  case FLOAT4:
    to.float4().writeFloat4(from.readFloat());
    break;
  case FLOAT8:
    to.float8().writeFloat8(from.readDouble());
    break;
  case INT:
    to.integer().writeInt(from.readInteger());
    break;
  case INTERVALDAY:
    IntervalDayHolder intervalDayHolder = new IntervalDayHolder();
    from.read(intervalDayHolder);
    to.intervalDay().write(intervalDayHolder);
    break;
  case INTERVALYEAR:
    IntervalYearHolder intervalYearHolder = new IntervalYearHolder();
    from.read(intervalYearHolder);
    to.intervalYear().write(intervalYearHolder);
    break;
  case TIMEMILLI:
    TimeMilliHolder timeHolder = new TimeMilliHolder();
    from.read(timeHolder);
    to.timeMilli().write(timeHolder);
    break;
  case TIMESTAMPMILLI:
    TimeStampMilliHolder timeStampHolder = new TimeStampMilliHolder();
    from.read(timeStampHolder);
    to.timeStampMilli().write(timeStampHolder);
    break;
  case VARBINARY:
    byte[] binaryData = from.readByteArray();
    workBuf = workBuf.reallocIfNeeded(binaryData.length);
    workBuf.setBytes(0, binaryData);
    to.varBinary().writeVarBinary(0, binaryData.length, workBuf);
    break;
  case VARCHAR:
    Text text = from.readText();
    workBuf = workBuf.reallocIfNeeded(text.getLength());
    workBuf.setBytes(0, text.getBytes());
    to.varChar().writeVarChar(0, text.getLength(), workBuf);
    break;
  case SMALLINT:
    to.smallInt().writeSmallInt(from.readShort());
    break;
  case TINYINT:
    to.tinyInt().writeTinyInt(from.readByte());
    break;
  case LIST:
    ListWriter listWriter = to.list();
    listWriter.startList();
    while(from.next()) {
      workBuf = copy(from.reader(), (FieldWriter)listWriter, workBuf);
    }
    listWriter.endList();
    break;
  case STRUCT:
  case NULL:
  case UINT1:
  case UINT2:
  case UINT4:
  case UINT8:
  case UNION:
  default:
    throw new UnsupportedOperationException(from.getMinorType().toString());
  }
  return workBuf;
}