Java源码示例:org.datavec.api.records.reader.impl.regex.RegexSequenceRecordReader

示例1
@Test
public void testRegexSequenceRecordReader() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    String path = new ClassPathResource("/logtestdata/logtestfile0.txt").getFile().toURI().toString();
    path = path.replace("0", "%d");

    InputSplit is = new NumberedFileInputSplit(path, 0, 1);

    SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
    rr.initialize(is);

    List<List<Writable>> exp0 = new ArrayList<>();
    exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"), new Text("DEBUG"),
                    new Text("First entry message!")));
    exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"), new Text("INFO"),
                    new Text("Second entry message!")));
    exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"), new Text("WARN"),
                    new Text("Third entry message!")));


    List<List<Writable>> exp1 = new ArrayList<>();
    exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.011"), new Text("11"), new Text("DEBUG"),
                    new Text("First entry message!")));
    exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.012"), new Text("12"), new Text("INFO"),
                    new Text("Second entry message!")));
    exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.013"), new Text("13"), new Text("WARN"),
                    new Text("Third entry message!")));

    assertEquals(exp0, rr.sequenceRecord());
    assertEquals(exp1, rr.sequenceRecord());
    assertFalse(rr.hasNext());

    //Test resetting:
    rr.reset();
    assertEquals(exp0, rr.sequenceRecord());
    assertEquals(exp1, rr.sequenceRecord());
    assertFalse(rr.hasNext());
}
 
示例2
@Test
public void testRegexSequenceRecordReaderMeta() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    String path = new ClassPathResource("/logtestdata/logtestfile0.txt").getFile().toURI().toString();
    path = path.replace("0", "%d");
    InputSplit is = new NumberedFileInputSplit(path, 0, 1);

    SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
    rr.initialize(is);

    List<List<List<Writable>>> out = new ArrayList<>();
    while (rr.hasNext()) {
        out.add(rr.sequenceRecord());
    }

    assertEquals(2, out.size());
    List<List<List<Writable>>> out2 = new ArrayList<>();
    List<SequenceRecord> out3 = new ArrayList<>();
    List<RecordMetaData> meta = new ArrayList<>();
    rr.reset();
    while (rr.hasNext()) {
        SequenceRecord seqr = rr.nextSequence();
        out2.add(seqr.getSequenceRecord());
        out3.add(seqr);
        meta.add(seqr.getMetaData());
    }

    List<SequenceRecord> fromMeta = rr.loadSequenceFromMetaData(meta);

    assertEquals(out, out2);
    assertEquals(out3, fromMeta);
}
 
示例3
@Test
public void testRR() throws Exception {

    List<RecordReader> rrs = new ArrayList<>();

    rrs.add(new CSVNLinesSequenceRecordReader(10));
    rrs.add(new CSVRecordReader(10, ','));
    rrs.add(new CSVSequenceRecordReader(1, ","));
    rrs.add(new CSVVariableSlidingWindowRecordReader(5));
    rrs.add(new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}));
    rrs.add(new JacksonRecordReader(new FieldSelection.Builder().addField("a").addField(new Text("MISSING_B"), "b")
            .addField(new Text("MISSING_CX"), "c", "x").build(), new ObjectMapper(new JsonFactory())));
    rrs.add(new JacksonLineRecordReader(new FieldSelection.Builder().addField("value1")
    		.addField("value2").build(), new ObjectMapper(new JsonFactory())));
    rrs.add(new LibSvmRecordReader());
    rrs.add(new SVMLightRecordReader());
    rrs.add(new RegexLineRecordReader("(.+) (.+) (.+)", 0));
    rrs.add(new RegexSequenceRecordReader("(.+) (.+) (.+)", 0));
    rrs.add(new TransformProcessRecordReader(new CSVRecordReader(), getTp()));
    rrs.add(new TransformProcessSequenceRecordReader(new CSVSequenceRecordReader(), getTp()));
    rrs.add(new LineRecordReader());

    for(RecordReader r : rrs){
        System.out.println(r.getClass().getName());
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream os = new ObjectOutputStream(baos);
        os.writeObject(r);
        byte[] bytes = baos.toByteArray();

        ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));

        RecordReader r2 = (RecordReader) ois.readObject();
    }
}
 
示例4
@Test
public void testRegexSequenceRecordReaderMeta() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    ClassPathResource cpr = new ClassPathResource("datavec-api/logtestdata/");
    File f = testDir.newFolder();
    cpr.copyDirectory(f);
    String path = new File(f, "logtestfile%d.txt").getAbsolutePath();

    InputSplit is = new NumberedFileInputSplit(path, 0, 1);

    SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
    rr.initialize(is);

    List<List<List<Writable>>> out = new ArrayList<>();
    while (rr.hasNext()) {
        out.add(rr.sequenceRecord());
    }

    assertEquals(2, out.size());
    List<List<List<Writable>>> out2 = new ArrayList<>();
    List<SequenceRecord> out3 = new ArrayList<>();
    List<RecordMetaData> meta = new ArrayList<>();
    rr.reset();
    while (rr.hasNext()) {
        SequenceRecord seqr = rr.nextSequence();
        out2.add(seqr.getSequenceRecord());
        out3.add(seqr);
        meta.add(seqr.getMetaData());
    }

    List<SequenceRecord> fromMeta = rr.loadSequenceFromMetaData(meta);

    assertEquals(out, out2);
    assertEquals(out3, fromMeta);
}
 
示例5
@Test
public void testRR() throws Exception {

    List<RecordReader> rrs = new ArrayList<>();

    rrs.add(new CSVNLinesSequenceRecordReader(10));
    rrs.add(new CSVRecordReader(10, ','));
    rrs.add(new CSVSequenceRecordReader(1, ","));
    rrs.add(new CSVVariableSlidingWindowRecordReader(5));
    rrs.add(new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}));
    rrs.add(new JacksonRecordReader(new FieldSelection.Builder().addField("a").addField(new Text("MISSING_B"), "b")
            .addField(new Text("MISSING_CX"), "c", "x").build(), new ObjectMapper(new JsonFactory())));
    rrs.add(new JacksonLineRecordReader(new FieldSelection.Builder().addField("value1")
    		.addField("value2").build(), new ObjectMapper(new JsonFactory())));
    rrs.add(new LibSvmRecordReader());
    rrs.add(new SVMLightRecordReader());
    rrs.add(new RegexLineRecordReader("(.+) (.+) (.+)", 0));
    rrs.add(new RegexSequenceRecordReader("(.+) (.+) (.+)", 0));
    rrs.add(new TransformProcessRecordReader(new CSVRecordReader(), getTp()));
    rrs.add(new TransformProcessSequenceRecordReader(new CSVSequenceRecordReader(), getTp()));
    rrs.add(new LineRecordReader());

    for(RecordReader r : rrs){
        System.out.println(r.getClass().getName());
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream os = new ObjectOutputStream(baos);
        os.writeObject(r);
        byte[] bytes = baos.toByteArray();

        ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));

        RecordReader r2 = (RecordReader) ois.readObject();
    }
}
 
示例6
@Test
public void testRegexSequenceRecordReader() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    ClassPathResource cpr = new ClassPathResource("datavec-api/logtestdata/");
    File f = testDir.newFolder();
    cpr.copyDirectory(f);
    String path = new File(f, "logtestfile%d.txt").getAbsolutePath();

    InputSplit is = new NumberedFileInputSplit(path, 0, 1);

    SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1);
    rr.initialize(is);

    List<List<Writable>> exp0 = new ArrayList<>();
    exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"), new Text("DEBUG"),
                    new Text("First entry message!")));
    exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"), new Text("INFO"),
                    new Text("Second entry message!")));
    exp0.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"), new Text("WARN"),
                    new Text("Third entry message!")));


    List<List<Writable>> exp1 = new ArrayList<>();
    exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.011"), new Text("11"), new Text("DEBUG"),
                    new Text("First entry message!")));
    exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.012"), new Text("12"), new Text("INFO"),
                    new Text("Second entry message!")));
    exp1.add(Arrays.asList((Writable) new Text("2016-01-01 23:59:59.013"), new Text("13"), new Text("WARN"),
                    new Text("Third entry message!")));

    assertEquals(exp0, rr.sequenceRecord());
    assertEquals(exp1, rr.sequenceRecord());
    assertFalse(rr.hasNext());

    //Test resetting:
    rr.reset();
    assertEquals(exp0, rr.sequenceRecord());
    assertEquals(exp1, rr.sequenceRecord());
    assertFalse(rr.hasNext());
}