Java源码示例:org.apache.arrow.vector.util.Text
示例1
/**
* Converts certain Arrow POJOs to Java POJOs to make downstream conversion easier.
*
* @param columnName column name where the input object comes from
* @param object the input object
* @param recordMetadata metadata object from glue table that contains customer specified information
* such as desired default timezone, or datetime formats
* @return the converted-to object if convertible, otherwise the original object
*/
public static Object convertArrowTypeIfNecessary(String columnName, Object object, DDBRecordMetadata recordMetadata)
{
if (object instanceof Text) {
return object.toString();
}
else if (object instanceof LocalDateTime) {
String datetimeFormat = recordMetadata.getDateTimeFormat(columnName);
DateTimeZone dtz = DateTimeZone.forID(recordMetadata.getDefaultTimeZone().toString());
if (datetimeFormat != null) {
return ((LocalDateTime) object).toDateTime(dtz).toLocalDateTime().toString(datetimeFormat);
}
return ((LocalDateTime) object).toDateTime(dtz).getMillis();
}
return object;
}
示例2
/**
* This method is a helper method added for DRILL-951
* TextRecordReader to call this method to get field names out
* @return array of field data strings
*/
public String [] getTextOutput () throws ExecutionSetupException {
if (recordCount == 0 || fieldIndex == -1) {
return null;
}
//Currently only first line header is supported. Return only first record.
int retSize = fieldIndex+1;
String [] out = new String [retSize];
try {
ListVector listVector = output.addField(new Field(COL_NAME, true, MinorType.LIST.getType(), null), ListVector.class);
List outputlist = (List) listVector.getObject((int)(recordCount-1));
for (int i=0; i<retSize; i++){
out[i] = ((Text) outputlist.get(i)).toString();
}
return out;
} catch (SchemaChangeException e) {
throw new ExecutionSetupException(e);
}
}
示例3
@Test
/**
* Tests the reading of nullable var length columns, runs the tests twice, once on a file that has
* a converted type of UTF-8 to make sure it can be read
*/
public void testNullableColumnsVarLen() throws Exception {
HashMap<String, FieldInfo> fields = new HashMap<>();
ParquetTestProperties props = new ParquetTestProperties(1, 300000, DEFAULT_BYTES_PER_PAGE, fields);
byte[] val = {'b'};
byte[] val2 = {'b', '2'};
byte[] val3 = {'b', '3'};
byte[] val4 = { 'l','o','n','g','e','r',' ','s','t','r','i','n','g'};
Object[] byteArrayVals = { val, val2, val4};
props.fields.put("a", new FieldInfo("boolean", "a", 1, byteArrayVals, TypeProtos.MinorType.BIT, props));
testParquetFullEngineEventBased(false, "/parquet/parquet_nullable_varlen.json", "/tmp/nullable_varlen.parquet", 1, props);
HashMap<String, FieldInfo> fields2 = new HashMap<>();
// pass strings instead of byte arrays
Object[] textVals = { new org.apache.arrow.vector.util.Text("b"), new org.apache.arrow.vector.util.Text("b2"),
new org.apache.arrow.vector.util.Text("b3") };
ParquetTestProperties props2 = new ParquetTestProperties(1, 30000, DEFAULT_BYTES_PER_PAGE, fields2);
props2.fields.put("a", new FieldInfo("boolean", "a", 1, textVals, TypeProtos.MinorType.BIT, props2));
testParquetFullEngineEventBased(false, "/parquet/parquet_scan_screen_read_entry_replace.json",
"\"/tmp/varLen.parquet/a\"", "unused", 1, props2);
}
示例4
public static List<String> getRefreshPath(final JobId jobId, final Path accelerationBasePath, JobsService jobsService, BufferAllocator allocator) {
// extract written path from writer's metadata
try (JobDataFragment data = JobDataClientUtils.getJobData(jobsService, allocator, jobId, 0, 1)) {
Text text = (Text) Preconditions.checkNotNull(data.extractValue(RecordWriter.PATH_COLUMN, 0),
"Empty write path for job %s", jobId.getId());
// relative path to the acceleration base path
final String path = PathUtils.relativePath(Path.of(text.toString()), accelerationBasePath);
// extract first 2 components of the path "<reflection-id>."<modified-materialization-id>"
List<String> components = PathUtils.toPathComponents(path);
Preconditions.checkState(components.size() >= 2, "Refresh path %s is incomplete", path);
return ImmutableList.of(ACCELERATOR_STORAGEPLUGIN_NAME, components.get(0), components.get(1));
}
}
示例5
/** Helper method to get the values in given range in colVarChar vector used in this test class. */
private static List<String> getVarCharValues(VectorContainer container, int start, int end) {
FieldReader reader = container.getValueAccessorById(VarCharVector.class, 1).getValueVector().getReader();
List<String> values = Lists.newArrayList();
for(int i=start; i<end; i++) {
reader.setPosition(i);
if (reader.isSet()) {
final Text val = reader.readText();
values.add(val == null ? null : val.toString());
} else {
values.add(null);
}
}
return values;
}
示例6
public void writeVarChar(FieldReader reader, JsonOutputContext context) throws IOException {
if (reader.isSet()) {
// NB: For UnionReader(s), reader.isSet() checks if the reader has a type set, not if the data itself is null
final Text text = reader.readText();
if (text != null) {
String value = text.toString();
if (value.length() > context.getRemaining()) {
// Truncate the string if the space is limited
value = value.substring(0, context.getRemaining());
context.setTruncated();
}
writeVarChar(value);
context.used(value.length());
return;
}
}
// Either the type or the value is not set
writeNull(context);
}
示例7
public static boolean castToInteger(FieldReader reader, NullableBigIntHolder out) {
Object o = reader.readObject();
if (o instanceof Number) {
out.value = ((Number) o).longValue();
return true;
} else if (o instanceof Boolean) {
out.value = ((Boolean) o).booleanValue() ? 1 : 0;
return true;
} else if (o instanceof LocalDateTime) {
out.value = toMillis((LocalDateTime) o);
return true;
} else if (o instanceof Text) {
try {
String s = Text.decode(((Text) o).getBytes(), 0, ((Text) o).getLength());
return parseLong(s, out);
} catch (CharacterCodingException e) {
// TODO: is this the best way?
logger.warn("Can't decode text", e);
return false;
}
} else if (o instanceof byte[]) {
return false; // TODO
}
return false;
}
示例8
private static Object convert(Object value)
{
if (value instanceof Text) {
return ((Text) value).toString();
}
return value;
}
示例9
private String getValue(Block block, int row, String fieldName)
{
VarCharReader reader = block.getFieldReader(fieldName);
reader.setPosition(row);
if (reader.isSet()) {
Text result = reader.readText();
return (result == null) ? null : result.toString();
}
return null;
}
示例10
/**
* Get the variable length element at specified index as Text.
*
* @param index position of element to get
* @return Text object for non-null element, null otherwise
*/
public Text getObject(int index) {
Text result = new Text();
byte[] b;
try {
b = get(index);
} catch (IllegalStateException e) {
return null;
}
result.set(b);
return result;
}
示例11
/**
* Insert at a position from l1 & immediately update it with a new value from l2,
* repeat N times.
*/
private void TestInterLeaved(MutableVarcharVector m1, LinkedList<String> l1, LinkedList<String> l2) {
int expectedOffset = 0;
for (int i = 0; i < TOTAL_STRINGS; ++i) {
//insert at i from l1
m1.setSafe(i, new Text(l1.get(i)));
//update at i from l2
m1.setSafe(i, new Text(l2.get(i)));
expectedOffset += l2.get(i).length();
}
final int oldOffset = m1.getCurrentOffset();
System.out.println("TestInterLeaved: buffer capacity: " + m1.getByteCapacity());
System.out.println("TestInterLeaved: Offset in buffer: " + oldOffset);
System.out.println("TestInterLeaved: Sum of all valid strings: " + expectedOffset);
final Stopwatch ctcWatch = Stopwatch.createUnstarted();
ctcWatch.start();
m1.forceCompact();
ctcWatch.stop();
final int newOffset = m1.getCurrentOffset();
System.out.println("TestInterLeaved: Post Compaction: Offset in buffer: " + newOffset);
System.out.println("TestInterLeaved: Compaction time: " + ctcWatch.toString() + "\n");
Assert.assertTrue(newOffset <= oldOffset);
//post compaction, the offset should be just the sum of lengths of all valid data.
Assert.assertEquals(expectedOffset, newOffset);
//match from l2
for (int i = 0; i < TOTAL_STRINGS; ++i) {
Assert.assertEquals(l2.get(i) /*expected*/, m1.getObject(i).toString() /*actual*/);
}
}
示例12
@Test
public void testFileWithNulls() throws Exception {
HashMap<String, FieldInfo> fields3 = new HashMap<>();
ParquetTestProperties props3 = new ParquetTestProperties(1, 3000, DEFAULT_BYTES_PER_PAGE, fields3);
// actually include null values
Object[] valuesWithNull = {new Text(""), new Text("longer string"), null};
props3.fields.put("a", new FieldInfo("boolean", "a", 1, valuesWithNull, TypeProtos.MinorType.BIT, props3));
testParquetFullEngineEventBased(false, "/parquet/parquet_scan_screen_read_entry_replace.json",
"\"/tmp/nullable_with_nulls.parquet\"", "unused", 1, props3);
}
示例13
public static void addToMaterializedResults(List<Map<String, Object>> materializedRecords,
List<QueryDataBatch> records,
RecordBatchLoader loader)
throws SchemaChangeException, UnsupportedEncodingException {
long totalRecords = 0;
QueryDataBatch batch;
int size = records.size();
for (int i = 0; i < size; i++) {
batch = records.get(0);
loader.load(batch.getHeader().getDef(), batch.getData());
// TODO: Clean: DRILL-2933: That load(...) no longer throws
// SchemaChangeException, so check/clean throws clause above.
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (int j = 0; j < loader.getRecordCount(); j++) {
Map<String, Object> record = new LinkedHashMap<>();
for (VectorWrapper<?> w : loader) {
Object obj = getVectorObject(w.getValueVector(), j);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
}
record.put(SchemaPath.getSimplePath(w.getField().getName()).toExpr(), obj);
}
materializedRecords.add(record);
}
records.remove(0);
batch.release();
loader.clear();
}
}
示例14
/**
* Convenience method to create a {@link JsonStringArrayList list} from the given values.
*/
public static JsonStringArrayList<Object> listOf(Object... values) {
final JsonStringArrayList<Object> list = new JsonStringArrayList<>();
for (Object value:values) {
if (value instanceof CharSequence) {
list.add(new Text(value.toString()));
} else {
list.add(value);
}
}
return list;
}
示例15
public static boolean castToFloat(FieldReader reader, NullableFloat8Holder out) {
Object o = reader.readObject();
if (o instanceof Number) {
out.value = ((Number) o).doubleValue();
return true;
} else if (o instanceof Boolean) {
out.value = ((Boolean) o).booleanValue() ? 1 : 0;
return true;
} else if (o instanceof LocalDateTime) {
out.value = toMillis((LocalDateTime) o);
return true;
} else if (o instanceof Text) {
String s;
try {
s = Text.decode(((Text) o).getBytes(), 0, ((Text) o).getLength());
if (!isNumeric(s)) {
return false;
}
out.value = Double.parseDouble(s);
return true;
} catch (Exception e) {
// TODO: is this the best way?
logger.warn("Can't decode text to FLOAT", e);
return false;
}
} else if (o instanceof byte[]) {
return false; // TODO
}
return false;
}
示例16
public static boolean castToBoolean(FieldReader reader, NullableBitHolder out) {
Object o = reader.readObject();
if (o instanceof Number) {
out.value = ((Number) o).doubleValue() != 0 ? 1 : 0;
return true;
} else if (o instanceof Boolean) {
out.value = ((Boolean) o).booleanValue() ? 1 : 0;
return true;
} else if (o instanceof LocalDateTime) {
out.value = toMillis((LocalDateTime) o) != 0 ? 1 : 0;
return true;
} else if (o instanceof Text) {
try {
String s = Text.decode(((Text) o).getBytes(), 0, ((Text) o).getLength());
if((s == null)
|| (s.length() == 0)
|| ("false".equalsIgnoreCase(s))
|| ("f".equalsIgnoreCase(s))
|| ("0".equals(s))
|| ("0.0".equals(s))) {
out.value = 0;
return true;
} else {
out.value = 1;
return true;
}
} catch (CharacterCodingException e) {
logger.warn("Can't decode text", e);
return false;
}
}
return false;
}
示例17
/**
* Test to ensure Dremio reads list of primitive data types
* @throws Exception
*/
private void readStructHiveDataTypes(String table) throws Exception {
int[] testrows = {0, 500, 1022, 1023, 1024, 4094, 4095, 4096, 4999};
for (int i = 0; i < testrows.length; ++i) {
Integer index = new Integer(testrows[i]);
JsonStringHashMap<String, Object> structrow1 = new JsonStringHashMap<String, Object>();
structrow1.put("tinyint_field", new Integer(1));
structrow1.put("smallint_field", new Integer(1024));
structrow1.put("int_field", new Integer(testrows[i]));
structrow1.put("bigint_field", new Long(90000000000L));
structrow1.put("float_field", new Float(testrows[i]));
structrow1.put("double_field", new Double(testrows[i]));
structrow1.put("string_field", new Text(Integer.toString(testrows[i])));
testBuilder().sqlQuery("SELECT * FROM hive." + table +
" order by rownum limit 1 offset " + index.toString())
.ordered()
.baselineColumns("rownum", "struct_field")
.baselineValues(new Integer(index), structrow1)
.go();
testBuilder().sqlQuery("SELECT rownum, struct_field['string_field'] AS string_field, struct_field['int_field'] AS int_field FROM hive." + table +
" order by rownum limit 1 offset " + index.toString())
.ordered()
.baselineColumns("rownum", "string_field", "int_field")
.baselineValues(new Integer(index), Integer.toString(testrows[i]), new Integer(index))
.go();
}
}
示例18
@Test
public void testArrayOfIPv4() throws Exception {
String ip1 = "10.0.0.1";
String ip2 = "192.168.0.1";
String ip3 = "10.0.8.6";
String ip4 = "10.0.8.5";
ElasticsearchCluster.ColumnData[] data = new ElasticsearchCluster.ColumnData[]{
new ElasticsearchCluster.ColumnData("ip_field", IP, null,
new Object[][]{
new Object[]{ip1, ip2, ip3},
new Object[]{ip4, ip1}
})
};
elastic.load(schema, table, data);
JsonStringArrayList<Text> values1 = new JsonStringArrayList<>();
JsonStringArrayList<Text> values2 = new JsonStringArrayList<>();
values1.add(new Text(ip1));
values1.add(new Text(ip2));
values1.add(new Text(ip3));
values2.add(new Text(ip4));
values2.add(new Text(ip1));
testBuilder()
.sqlQuery("select ip_field from elasticsearch." + schema + "." + table)
.baselineColumns("ip_field")
.unOrdered()
.baselineValues(values1)
.baselineValues(values2)
.go();
}
示例19
private Projection createSimpleValueProjection(Types.MinorType minorType)
{
switch (minorType) {
case DATEMILLI:
return (fieldReader) -> {
if (Objects.isNull(fieldReader.readLocalDateTime())) {
return null;
}
long millis = fieldReader.readLocalDateTime().toDateTime(org.joda.time.DateTimeZone.UTC).getMillis();
return Instant.ofEpochMilli(millis).atZone(BlockUtils.UTC_ZONE_ID).toLocalDateTime();
};
case TINYINT:
case UINT1:
return (fieldReader) -> fieldReader.readByte();
case UINT2:
return (fieldReader) -> fieldReader.readCharacter();
case SMALLINT:
return (fieldReader) -> fieldReader.readShort();
case DATEDAY:
return (fieldReader) -> {
Integer intVal = fieldReader.readInteger();
if (Objects.isNull(intVal)) {
return null;
}
return LocalDate.ofEpochDay(intVal);
};
case INT:
case UINT4:
return (fieldReader) -> fieldReader.readInteger();
case UINT8:
case BIGINT:
return (fieldReader) -> fieldReader.readLong();
case DECIMAL:
return (fieldReader) -> fieldReader.readBigDecimal();
case FLOAT4:
return (fieldReader) -> fieldReader.readFloat();
case FLOAT8:
return (fieldReader) -> fieldReader.readDouble();
case VARCHAR:
return (fieldReader) -> {
Text text = fieldReader.readText();
if (Objects.isNull(text)) {
return null;
}
return text.toString();
};
case VARBINARY:
return (fieldReader) -> fieldReader.readByteArray();
case BIT:
return (fieldReader) -> fieldReader.readBoolean();
default:
throw new IllegalArgumentException("Unsupported type " + minorType);
}
}
示例20
/**
* Used to get values (Integer, Double, String, etc) from the Arrow values in the fieldReader
* @param fieldReader the field reader containing the arrow value
* @return the value object in java type
*/
public static Object getValue(FieldReader fieldReader, int pos)
{
fieldReader.setPosition(pos);
Types.MinorType minorType = fieldReader.getMinorType();
switch (minorType) {
case DATEMILLI:
if (Objects.isNull(fieldReader.readLocalDateTime())) {
return null;
}
long millis = fieldReader.readLocalDateTime().toDateTime(org.joda.time.DateTimeZone.UTC).getMillis();
return Instant.ofEpochMilli(millis).atZone(UTC_ZONE_ID).toLocalDateTime();
case TINYINT:
case UINT1:
return fieldReader.readByte();
case UINT2:
return fieldReader.readCharacter();
case SMALLINT:
return fieldReader.readShort();
case DATEDAY:
Integer intVal = fieldReader.readInteger();
if (Objects.isNull(intVal)) {
return null;
}
return LocalDate.ofEpochDay(intVal);
case INT:
case UINT4:
return fieldReader.readInteger();
case UINT8:
case BIGINT:
return fieldReader.readLong();
case DECIMAL:
return fieldReader.readBigDecimal();
case FLOAT4:
return fieldReader.readFloat();
case FLOAT8:
return fieldReader.readDouble();
case VARCHAR:
Text text = fieldReader.readText();
if (Objects.isNull(text)) {
return null;
}
return text.toString();
case VARBINARY:
return fieldReader.readByteArray();
case BIT:
return fieldReader.readBoolean();
case LIST:
return readList(fieldReader);
case STRUCT:
return readStruct(fieldReader);
default:
throw new IllegalArgumentException("Unsupported type " + minorType);
}
}
示例21
private void writeColumn(FieldVector fieldVector, int idx)
{
if (fieldVector instanceof IntVector) {
IntVector intVector = (IntVector) fieldVector;
intVector.setSafe(idx, idx + 100);
return;
}
if (fieldVector instanceof Float4Vector) {
Float4Vector float4Vector = (Float4Vector) fieldVector;
float4Vector.setSafe(idx, idx + 100.1f);
return;
}
if (fieldVector instanceof Float8Vector) {
Float8Vector float8Vector = (Float8Vector) fieldVector;
float8Vector.setSafe(idx, idx + 100.2);
return;
}
if (fieldVector instanceof VarCharVector) {
VarCharVector varCharVector = (VarCharVector) fieldVector;
varCharVector.setSafe(idx, new Text(idx + "-my-varchar"));
return;
}
if (fieldVector instanceof ListVector) {
BlockUtils.setComplexValue(fieldVector,
idx,
FieldResolver.DEFAULT,
ImmutableList.of(idx + 100, idx + 200, idx + 300));
return;
}
if (fieldVector instanceof StructVector) {
Map<String, Object> input = ImmutableMap.of("intVal", idx + 100, "doubleVal", idx + 200.2);
BlockUtils.setComplexValue(fieldVector,
idx,
FieldResolver.DEFAULT,
input);
return;
}
throw new IllegalArgumentException("Unsupported fieldVector " + fieldVector.getClass().getCanonicalName());
}
示例22
/**
* Used to convert from Apache Arrow typed values to HBase values.
*
* @param isNative If True, the HBase value should be stored using native bytes.
* If False, the value should be serialized as a String before storing it.
* @param value The value to convert.
* @return The HBase byte representation of the value.
* @note This is commonly used when attempting to push constraints into HBase which requires converting a small
* number of values from Apache Arrow's Type system to HBase compatible representations for comparisons.
*/
public static byte[] toBytes(boolean isNative, Object value)
{
if (value == null || value instanceof byte[]) {
return (byte[]) value;
}
if (value instanceof String) {
return ((String) value).getBytes();
}
if (value instanceof Text) {
return ((Text) value).toString().getBytes();
}
if (!isNative) {
return String.valueOf(value).getBytes();
}
if (value instanceof Integer) {
return ByteBuffer.allocate(4).putInt((int) value).array();
}
if (value instanceof Long) {
return ByteBuffer.allocate(8).putLong((long) value).array();
}
if (value instanceof Float) {
return ByteBuffer.allocate(4).putFloat((float) value).array();
}
if (value instanceof Double) {
return ByteBuffer.allocate(8).putDouble((double) value).array();
}
if (value instanceof Boolean) {
return ByteBuffer.allocate(1).put((byte) ((boolean) value ? 1 : 0)).array();
}
String className = (value == null || value.getClass() == null) ? "null" : value.getClass().getName();
throw new RuntimeException("Unsupported object type for " + value + " with class name " + className);
}
示例23
/**
* Insert 1000 strings of avg size 10
* Update 1000 strings with avg size 100
* Compact
* Validate
*/
@Test
public void TestUpdateAfterInsert()
{
final double threshold = 1.0D;
MutableVarcharVector m1 = new MutableVarcharVector("TestUpdateAfterInsert", testAllocator, threshold /*only force compact*/);
try {
LinkedList<String> l1 = GetRandomStringList(TOTAL_STRINGS, smallAvgSize);
LinkedList<String> l2 = GetRandomStringList(TOTAL_STRINGS, midAvgSize);
//insert from l1
for (int i = 0; i < TOTAL_STRINGS; ++i) {
m1.setSafe(i, new Text(l1.get(i)));
}
m1.setValueCount(TOTAL_STRINGS);
System.out.println("TestUpdateAfterInsert: threshold: " + threshold + " (only forcecompact)");
System.out.println("TestUpdateAfterInsert: buffer capacity: " + m1.getByteCapacity());
System.out.println("TestUpdateAfterInsert: Inserted " + TOTAL_STRINGS + " strings with avg size: " + smallAvgSize);
System.out.println("TestUpdateAfterInsert: Offset in buffer: " + m1.getCurrentOffset());
//update from l2
for (int i = 0; i < TOTAL_STRINGS; ++i) {
m1.setSafe(i, new Text(l2.get(i)));
}
System.out.println("TestUpdateAfterInsert: Updated " + TOTAL_STRINGS + " strings with avg size: " + midAvgSize);
System.out.println("TestUpdateAfterInsert: Offset in buffer: " + m1.getCurrentOffset());
//compact
final int oldOffset = m1.getCurrentOffset();
final Stopwatch ctcWatch = Stopwatch.createUnstarted();
ctcWatch.start();
m1.forceCompact();
ctcWatch.stop();
final int newOffset = m1.getCurrentOffset();
System.out.println("TestUpdateAfterInsert: Post Compaction: Offset in buffer: " + newOffset);
System.out.println("TestUpdateAfterInsert: Compaction time: " + ctcWatch.toString() + "\n");
Assert.assertTrue(newOffset <= oldOffset);
//match from l2
for (int i = 0; i < TOTAL_STRINGS; ++i) {
Assert.assertEquals(l2.get(i) /*expected*/, m1.getObject(i).toString() /*actual*/);
}
} finally {
m1.close();
}
}
示例24
@Test
public void TestCopyOut()
{
MutableVarcharVector m1 = new MutableVarcharVector("TestCopyOut", testAllocator, 1.0D /* never compact */);
VarCharVector v1 = new VarCharVector("TestCopyOutVarchar", testAllocator);
VarCharVector v2 = new VarCharVector("TestCopyOutVarcharPostCompaction", testAllocator);
try {
//insert the values from l1
LinkedList<String> l1 = GetRandomStringList(TOTAL_STRINGS, smallAvgSize);
//update values from l2
LinkedList<String> l2 = GetRandomStringList(TOTAL_STRINGS, midAvgSize);
int expectedOffset = 0;
int i = 0;
final int firstIndex = 10; //beginning firstIndex records to be null
//insert TOTAL_STRINGS nulls and values
int startIdx = firstIndex;
while (i < TOTAL_STRINGS) {
//insert
m1.setSafe(startIdx, new Text(l1.get(i)));
//update
m1.setSafe(startIdx, new Text(l2.get(i)));
expectedOffset += l2.get(i).length();
++i;
//create null values
startIdx += 2;
}
//allocate with twice the index
v1.allocateNew(m1.getUsedByteCapacity(), startIdx * 2);
//copy records
m1.copyToVarchar(v1, 0, startIdx * 3);
//reset the value count back to actual strings copied
v1.setValueCount(startIdx);
//validate the records, skip null values
i = 0;
int j = firstIndex;
while (i < TOTAL_STRINGS) {
Assert.assertEquals(l2.get(i) /*expected*/, v1.getObject(j).toString() /*actual*/);
++i;
j += 2; //skip nulls
}
//counters must match
Assert.assertEquals(startIdx, j);
Assert.assertEquals(TOTAL_STRINGS + firstIndex, v1.getNullCount());
}
finally {
m1.close();
v1.close();
}
}
示例25
@Test
public void TestCopyOutPostCompaction()
{
MutableVarcharVector m1 = new MutableVarcharVector("TestCopyOut", testAllocator, 1.0D /* never compact */);
VarCharVector v2 = new VarCharVector("TestCopyOutVarcharPostCompaction", testAllocator);
try {
//insert the values from l1
LinkedList<String> l1 = GetRandomStringList(TOTAL_STRINGS, smallAvgSize);
//update values from l2
LinkedList<String> l2 = GetRandomStringList(TOTAL_STRINGS, midAvgSize);
int expectedOffset = 0;
int i = 0;
final int firstIndex = 10; //beginning firstIndex records to be null
//insert TOTAL_STRINGS nulls and values
int startIdx = firstIndex;
while (i < TOTAL_STRINGS) {
//insert
m1.setSafe(startIdx, new Text(l1.get(i)));
//update
m1.setSafe(startIdx, new Text(l2.get(i)));
expectedOffset += l2.get(i).length();
++i;
//create null values
startIdx += 2;
}
//Reduce garbage date
m1.forceCompact();
//allocate with twice the index
v2.allocateNew(m1.getUsedByteCapacity(), startIdx * 2);
//copy records
m1.copyToVarchar(v2, 0, startIdx * 3);
//reset the value count back to actual strings copied
v2.setValueCount(startIdx);
//verify that post compaction also data matches
i = 0;
int j = firstIndex;
while (i < TOTAL_STRINGS) {
Assert.assertEquals(l2.get(i) /*expected*/, v2.getObject(j).toString() /*actual*/);
++i;
j += 2; //skip nulls
}
Assert.assertEquals(startIdx, j);
Assert.assertEquals(TOTAL_STRINGS + firstIndex, v2.getNullCount());
}
finally {
m1.close();
v2.close();
}
}
示例26
@Test
public void testConstExprFolding_maxDir0() throws Exception {
test("use dfs_root");
List<String> allFiles = ImmutableList.<String>builder()
.add("smallfile")
.add("SMALLFILE_2")
.add("bigfile")
.add("BIGFILE_2")
.build();
String query = "select * from dfs_root.\"" + path + "/*/*.csv\" where dir0 = %s('dfs','" + path + "')";
for (ConstantFoldingTestConfig config : tests) {
// make all of the other folders unexpected patterns, except for the one expected in this case
List<String> excludedPatterns = Lists.newArrayList();
excludedPatterns.addAll(allFiles);
excludedPatterns.remove(config.expectedFolderName);
// The list is easier to construct programmatically, but the API below takes an array to make it easier
// to write a list as a literal array in a typical test definition
String[] excludedArray = new String[excludedPatterns.size()];
testPlanMatchingPatterns(
String.format(query, config.funcName),
new String[] {config.expectedFolderName},
excludedPatterns.toArray(excludedArray));
}
JsonStringArrayList<Text> list = new JsonStringArrayList<>();
list.add(new Text("1"));
list.add(new Text("2"));
list.add(new Text("3"));
testBuilder()
.sqlQuery(String.format(query, tests.get(0).funcName))
.unOrdered()
.baselineColumns("columns", "dir0")
.baselineValues(list, tests.get(0).expectedFolderName)
.go();
}
示例27
@Test
public void testListofListWithNulls() throws Exception {
JsonStringArrayList<Text> thirdLevelList = new JsonStringArrayList<>();
thirdLevelList.add(new Text("a"));
thirdLevelList.add(null);
JsonStringArrayList<JsonStringArrayList> secondLevelList = new JsonStringArrayList<>();
secondLevelList.add(thirdLevelList);
secondLevelList.add(null);
JsonStringArrayList<JsonStringArrayList> topLevelList = new JsonStringArrayList<>();
topLevelList.add(secondLevelList);
topLevelList.add(null);
String query = "SELECT " +
"col1 " +
"FROM cp.\"/parquet/list_list_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("col1")
.baselineValues(topLevelList)
.build()
.run();
query = "SELECT " +
"col1[0] as c1, col1[1] as c2 " +
"FROM cp.\"/parquet/list_list_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues(secondLevelList, null)
.build()
.run();
query = "SELECT " +
"col1[0] as c1, col1[1] as c2 " +
"FROM cp.\"/parquet/list_list_null_test.parquet\" where col1[1] is null";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues(secondLevelList, null)
.build()
.run();
query = "SELECT " +
"col1[0][0] as c1, col1[0][1] as c2 " +
"FROM cp.\"/parquet/list_list_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues(thirdLevelList, null)
.build()
.run();
query = "SELECT " +
"col1[0][0] as c1, col1[0][1] as c2 " +
"FROM cp.\"/parquet/list_list_null_test.parquet\" where col1[0][1] is null";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues(thirdLevelList, null)
.build()
.run();
}
示例28
@Test
public void testListofStructWithNulls() throws Exception {
JsonStringArrayList<Text> thirdLevelList = new JsonStringArrayList<>();
thirdLevelList.add(new Text("a"));
thirdLevelList.add(null);
JsonStringHashMap<String, Object> secondLevelStruct = new JsonStringHashMap<String, Object>();
secondLevelStruct.put("f1", thirdLevelList);
JsonStringArrayList<JsonStringHashMap> topLevelList = new JsonStringArrayList<>();
topLevelList.add(secondLevelStruct);
topLevelList.add(null);
String query = "SELECT " +
"col1 " +
"FROM cp.\"/parquet/list_struct_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("col1")
.baselineValues(topLevelList)
.build()
.run();
query = "SELECT " +
"col1[0] as c1, col1[1] as c2 " +
"FROM cp.\"/parquet/list_struct_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues(secondLevelStruct, null)
.build()
.run();
query = "SELECT " +
"col1[0] as c1, col1[1] as c2 " +
"FROM cp.\"/parquet/list_struct_null_test.parquet\" where col1[1] is null";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues(secondLevelStruct, null)
.build()
.run();
query = "SELECT " +
"col1[0].\"f1\" as c1 " +
"FROM cp.\"/parquet/list_struct_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1")
.baselineValues(thirdLevelList)
.build()
.run();
query = "SELECT " +
"col1[0].\"f1\"[0] as c1, col1[0].\"f1\"[1] as c2 " +
"FROM cp.\"/parquet/list_struct_null_test.parquet\"";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues("a", null)
.build()
.run();
query = "SELECT " +
"col1[0].\"f1\"[0] as c1, col1[0].\"f1\"[1] as c2 " +
"FROM cp.\"/parquet/list_struct_null_test.parquet\" where col1[0].\"f1\"[1] is null";
testBuilder()
.sqlQuery(query)
.ordered()
.baselineColumns("c1", "c2")
.baselineValues("a", null)
.build()
.run();
}
示例29
public VarChar(String obj) {
super(obj == null ? null : new Text(obj));
}
示例30
public static ArrowBuf copy(FieldReader from, FieldWriter to, ArrowBuf workBuf) {
switch (from.getMinorType()) {
case BIGINT:
to.bigInt().writeBigInt(from.readLong());
break;
case BIT:
to.bit().writeBit(from.readBoolean() ? 1 : 0);
break;
case DATEMILLI:
DateMilliHolder dateHolder = new DateMilliHolder();
from.read(dateHolder);
to.dateMilli().write(dateHolder);
break;
case FLOAT4:
to.float4().writeFloat4(from.readFloat());
break;
case FLOAT8:
to.float8().writeFloat8(from.readDouble());
break;
case INT:
to.integer().writeInt(from.readInteger());
break;
case INTERVALDAY:
IntervalDayHolder intervalDayHolder = new IntervalDayHolder();
from.read(intervalDayHolder);
to.intervalDay().write(intervalDayHolder);
break;
case INTERVALYEAR:
IntervalYearHolder intervalYearHolder = new IntervalYearHolder();
from.read(intervalYearHolder);
to.intervalYear().write(intervalYearHolder);
break;
case TIMEMILLI:
TimeMilliHolder timeHolder = new TimeMilliHolder();
from.read(timeHolder);
to.timeMilli().write(timeHolder);
break;
case TIMESTAMPMILLI:
TimeStampMilliHolder timeStampHolder = new TimeStampMilliHolder();
from.read(timeStampHolder);
to.timeStampMilli().write(timeStampHolder);
break;
case VARBINARY:
byte[] binaryData = from.readByteArray();
workBuf = workBuf.reallocIfNeeded(binaryData.length);
workBuf.setBytes(0, binaryData);
to.varBinary().writeVarBinary(0, binaryData.length, workBuf);
break;
case VARCHAR:
Text text = from.readText();
workBuf = workBuf.reallocIfNeeded(text.getLength());
workBuf.setBytes(0, text.getBytes());
to.varChar().writeVarChar(0, text.getLength(), workBuf);
break;
case SMALLINT:
to.smallInt().writeSmallInt(from.readShort());
break;
case TINYINT:
to.tinyInt().writeTinyInt(from.readByte());
break;
case LIST:
ListWriter listWriter = to.list();
listWriter.startList();
while(from.next()) {
workBuf = copy(from.reader(), (FieldWriter)listWriter, workBuf);
}
listWriter.endList();
break;
case STRUCT:
case NULL:
case UINT1:
case UINT2:
case UINT4:
case UINT8:
case UNION:
default:
throw new UnsupportedOperationException(from.getMinorType().toString());
}
return workBuf;
}