Java源码示例:org.datavec.api.records.reader.impl.regex.RegexSequenceRecordReader
示例1
@Test
public void testRegexSequenceRecordReader() throws Exception {
String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";
String path = new ClassPathResource("/logtestdata/logtestfile0.txt").getFile().toURI().toString();
path = path.replace("0", "%d");
InputSplit is = new NumberedFileInputSplit(path, 0, 1);
SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
rr.initialize(is);
List<List<Writable>> exp0 = new ArrayList<>();
exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"), new Text("DEBUG"),
new Text("First entry message!")));
exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"), new Text("INFO"),
new Text("Second entry message!")));
exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"), new Text("WARN"),
new Text("Third entry message!")));
List<List<Writable>> exp1 = new ArrayList<>();
exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.011"), new Text("11"), new Text("DEBUG"),
new Text("First entry message!")));
exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.012"), new Text("12"), new Text("INFO"),
new Text("Second entry message!")));
exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.013"), new Text("13"), new Text("WARN"),
new Text("Third entry message!")));
assertEquals(exp0, rr.sequenceRecord());
assertEquals(exp1, rr.sequenceRecord());
assertFalse(rr.hasNext());
//Test resetting:
rr.reset();
assertEquals(exp0, rr.sequenceRecord());
assertEquals(exp1, rr.sequenceRecord());
assertFalse(rr.hasNext());
}
示例2
@Test
public void testRegexSequenceRecordReaderMeta() throws Exception {
String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";
String path = new ClassPathResource("/logtestdata/logtestfile0.txt").getFile().toURI().toString();
path = path.replace("0", "%d");
InputSplit is = new NumberedFileInputSplit(path, 0, 1);
SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
rr.initialize(is);
List<List<List<Writable>>> out = new ArrayList<>();
while (rr.hasNext()) {
out.add(rr.sequenceRecord());
}
assertEquals(2, out.size());
List<List<List<Writable>>> out2 = new ArrayList<>();
List<SequenceRecord> out3 = new ArrayList<>();
List<RecordMetaData> meta = new ArrayList<>();
rr.reset();
while (rr.hasNext()) {
SequenceRecord seqr = rr.nextSequence();
out2.add(seqr.getSequenceRecord());
out3.add(seqr);
meta.add(seqr.getMetaData());
}
List<SequenceRecord> fromMeta = rr.loadSequenceFromMetaData(meta);
assertEquals(out, out2);
assertEquals(out3, fromMeta);
}
示例3
@Test
public void testRR() throws Exception {
List<RecordReader> rrs = new ArrayList<>();
rrs.add(new CSVNLinesSequenceRecordReader(10));
rrs.add(new CSVRecordReader(10, ','));
rrs.add(new CSVSequenceRecordReader(1, ","));
rrs.add(new CSVVariableSlidingWindowRecordReader(5));
rrs.add(new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}));
rrs.add(new JacksonRecordReader(new FieldSelection.Builder().addField("a").addField(new Text("MISSING_B"), "b")
.addField(new Text("MISSING_CX"), "c", "x").build(), new ObjectMapper(new JsonFactory())));
rrs.add(new JacksonLineRecordReader(new FieldSelection.Builder().addField("value1")
.addField("value2").build(), new ObjectMapper(new JsonFactory())));
rrs.add(new LibSvmRecordReader());
rrs.add(new SVMLightRecordReader());
rrs.add(new RegexLineRecordReader("(.+) (.+) (.+)", 0));
rrs.add(new RegexSequenceRecordReader("(.+) (.+) (.+)", 0));
rrs.add(new TransformProcessRecordReader(new CSVRecordReader(), getTp()));
rrs.add(new TransformProcessSequenceRecordReader(new CSVSequenceRecordReader(), getTp()));
rrs.add(new LineRecordReader());
for(RecordReader r : rrs){
System.out.println(r.getClass().getName());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(baos);
os.writeObject(r);
byte[] bytes = baos.toByteArray();
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
RecordReader r2 = (RecordReader) ois.readObject();
}
}
示例4
@Test
public void testRegexSequenceRecordReaderMeta() throws Exception {
String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";
ClassPathResource cpr = new ClassPathResource("datavec-api/logtestdata/");
File f = testDir.newFolder();
cpr.copyDirectory(f);
String path = new File(f, "logtestfile%d.txt").getAbsolutePath();
InputSplit is = new NumberedFileInputSplit(path, 0, 1);
SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
rr.initialize(is);
List<List<List<Writable>>> out = new ArrayList<>();
while (rr.hasNext()) {
out.add(rr.sequenceRecord());
}
assertEquals(2, out.size());
List<List<List<Writable>>> out2 = new ArrayList<>();
List<SequenceRecord> out3 = new ArrayList<>();
List<RecordMetaData> meta = new ArrayList<>();
rr.reset();
while (rr.hasNext()) {
SequenceRecord seqr = rr.nextSequence();
out2.add(seqr.getSequenceRecord());
out3.add(seqr);
meta.add(seqr.getMetaData());
}
List<SequenceRecord> fromMeta = rr.loadSequenceFromMetaData(meta);
assertEquals(out, out2);
assertEquals(out3, fromMeta);
}
示例5
@Test
public void testRR() throws Exception {
List<RecordReader> rrs = new ArrayList<>();
rrs.add(new CSVNLinesSequenceRecordReader(10));
rrs.add(new CSVRecordReader(10, ','));
rrs.add(new CSVSequenceRecordReader(1, ","));
rrs.add(new CSVVariableSlidingWindowRecordReader(5));
rrs.add(new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}));
rrs.add(new JacksonRecordReader(new FieldSelection.Builder().addField("a").addField(new Text("MISSING_B"), "b")
.addField(new Text("MISSING_CX"), "c", "x").build(), new ObjectMapper(new JsonFactory())));
rrs.add(new JacksonLineRecordReader(new FieldSelection.Builder().addField("value1")
.addField("value2").build(), new ObjectMapper(new JsonFactory())));
rrs.add(new LibSvmRecordReader());
rrs.add(new SVMLightRecordReader());
rrs.add(new RegexLineRecordReader("(.+) (.+) (.+)", 0));
rrs.add(new RegexSequenceRecordReader("(.+) (.+) (.+)", 0));
rrs.add(new TransformProcessRecordReader(new CSVRecordReader(), getTp()));
rrs.add(new TransformProcessSequenceRecordReader(new CSVSequenceRecordReader(), getTp()));
rrs.add(new LineRecordReader());
for(RecordReader r : rrs){
System.out.println(r.getClass().getName());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(baos);
os.writeObject(r);
byte[] bytes = baos.toByteArray();
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
RecordReader r2 = (RecordReader) ois.readObject();
}
}
示例6
@Test
public void testRegexSequenceRecordReader() throws Exception {
String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";
ClassPathResource cpr = new ClassPathResource("datavec-api/logtestdata/");
File f = testDir.newFolder();
cpr.copyDirectory(f);
String path = new File(f, "logtestfile%d.txt").getAbsolutePath();
InputSplit is = new NumberedFileInputSplit(path, 0, 1);
SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
rr.initialize(is);
List<List<Writable>> exp0 = new ArrayList<>();
exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"), new Text("DEBUG"),
new Text("First entry message!")));
exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"), new Text("INFO"),
new Text("Second entry message!")));
exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"), new Text("WARN"),
new Text("Third entry message!")));
List<List<Writable>> exp1 = new ArrayList<>();
exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.011"), new Text("11"), new Text("DEBUG"),
new Text("First entry message!")));
exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.012"), new Text("12"), new Text("INFO"),
new Text("Second entry message!")));
exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.013"), new Text("13"), new Text("WARN"),
new Text("Third entry message!")));
assertEquals(exp0, rr.sequenceRecord());
assertEquals(exp1, rr.sequenceRecord());
assertFalse(rr.hasNext());
//Test resetting:
rr.reset();
assertEquals(exp0, rr.sequenceRecord());
assertEquals(exp1, rr.sequenceRecord());
assertFalse(rr.hasNext());
}