Java源码示例:org.datavec.api.util.ndarray.RecordConverter

示例1
@Override
public List<List<Writable>> sequenceRecord() {
    File next = iter.next();
    invokeListeners(next);
    if (!next.isDirectory())
        return Collections.emptyList();
    File[] list = next.listFiles();
    List<List<Writable>> ret = new ArrayList<>();
    for (File f : list) {
        try {
            List<Writable> record = RecordConverter.toRecord(imageLoader.asRowVector(f));
            ret.add(record);
            if (appendLabel)
                record.add(new DoubleWritable(labels.indexOf(next.getName())));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }
    return ret;
}
 
示例2
/**
 *
 * @param singleCsvRecord
 * @return
 */
public Base64NDArrayBody transformSequenceArrayIncremental(BatchCSVRecord singleCsvRecord) {
    List<List<List<Writable>>> converted =  executeToSequence(toArrowWritables(toArrowColumnsString(
            bufferAllocator,transformProcess.getInitialSchema(),
            singleCsvRecord.getRecordsAsString()),
            transformProcess.getInitialSchema()),transformProcess);
    ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch = (ArrowWritableRecordTimeSeriesBatch) converted;
    INDArray arr = RecordConverter.toTensor(arrowWritableRecordBatch);
    try {
        return new Base64NDArrayBody(Nd4jBase64.base64String(arr));
    } catch (IOException e) {
        e.printStackTrace();
    }

    return null;
}
 
示例3
@Test
public void toRecords_PassInRegressionDataSet_ExpectNDArrayAndDoubleWritables() {
    INDArray feature = Nd4j.create(new double[]{4, -5.7, 10, -0.1});
    INDArray label = Nd4j.create(new double[]{.5, 2, 3, .5});
    DataSet dataSet = new DataSet(feature, label);

    List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);
    List<Writable> results = writableList.get(0);
    NDArrayWritable ndArrayWritable = (NDArrayWritable) results.get(0);

    assertEquals(1, writableList.size());
    assertEquals(5, results.size());
    assertEquals(feature, ndArrayWritable.get());
    for (int i = 0; i < label.shape()[1]; i++) {
        DoubleWritable doubleWritable = (DoubleWritable) results.get(i + 1);
        assertEquals(label.getDouble(i), doubleWritable.get(), 0);
    }
}
 
示例4
@Test
public void testMinMax() {
    INDArray arr = Nd4j.linspace(1, 10, 10).broadcast(10, 10);
    for (int i = 0; i < arr.rows(); i++)
        arr.getRow(i).addi(i);

    List<List<Writable>> records = RecordConverter.toRecords(arr);
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 10;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));
    Schema schema = builder.build();
    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, sc.parallelize(records));
    dataFrame.get().show();
    dataFrame.get().describe(DataFrames.toArray(schema.getColumnNames())).show();
    //        System.out.println(Normalization.minMaxColumns(dataFrame,schema.getColumnNames()));
    //        System.out.println(Normalization.stdDevMeanColumns(dataFrame,schema.getColumnNames()));

}
 
示例5
/**
 *
 * @param singleCsvRecord
 * @return
 */
public Base64NDArrayBody transformSequenceArrayIncremental(BatchCSVRecord singleCsvRecord) {
    List<List<List<Writable>>> converted =  executeToSequence(toArrowWritables(toArrowColumnsString(
            bufferAllocator,transformProcess.getInitialSchema(),
            singleCsvRecord.getRecordsAsString()),
            transformProcess.getInitialSchema()),transformProcess);
    ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch = (ArrowWritableRecordTimeSeriesBatch) converted;
    INDArray arr = RecordConverter.toTensor(arrowWritableRecordBatch);
    try {
        return new Base64NDArrayBody(Nd4jBase64.base64String(arr));
    } catch (IOException e) {
        log.error("",e);
    }

    return null;
}
 
示例6
@Test
public void toRecords_PassInRegressionDataSet_ExpectNDArrayAndDoubleWritables() {
    INDArray feature = Nd4j.create(new double[]{4, -5.7, 10, -0.1}, new long[]{1, 4}, DataType.FLOAT);
    INDArray label = Nd4j.create(new double[]{.5, 2, 3, .5}, new long[]{1, 4}, DataType.FLOAT);
    DataSet dataSet = new DataSet(feature, label);

    List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);
    List<Writable> results = writableList.get(0);
    NDArrayWritable ndArrayWritable = (NDArrayWritable) results.get(0);

    assertEquals(1, writableList.size());
    assertEquals(5, results.size());
    assertEquals(feature, ndArrayWritable.get());
    for (int i = 0; i < label.shape()[1]; i++) {
        DoubleWritable doubleWritable = (DoubleWritable) results.get(i + 1);
        assertEquals(label.getDouble(i), doubleWritable.get(), 0);
    }
}
 
示例7
@Test
public void testMinMax() {
    INDArray arr = Nd4j.linspace(1, 10, 10).broadcast(10, 10);
    for (int i = 0; i < arr.rows(); i++)
        arr.getRow(i).addi(i);

    List<List<Writable>> records = RecordConverter.toRecords(arr);
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 10;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));
    Schema schema = builder.build();
    Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, sc.parallelize(records));
    dataFrame.show();
    dataFrame.describe(DataFrames.toArray(schema.getColumnNames())).show();
    //        System.out.println(Normalization.minMaxColumns(dataFrame,schema.getColumnNames()));
    //        System.out.println(Normalization.stdDevMeanColumns(dataFrame,schema.getColumnNames()));

}
 
示例8
@Override
public List<List<Writable>> next(int num) {
    int numExamples = Math.min(num, limit - counter.get());
    //counter.addAndGet(numExamples);

    INDArray features = zFeatures;
    for (int i = 0; i < numExamples; i++) {
        fillNDArray(features.tensorAlongDimension(i, 1, 2, 3), counter.getAndIncrement());
    }

    INDArray labels = Nd4j.create(numExamples, numClasses);
    for (int i = 0; i < numExamples; i++) {
        labels.getRow(i).assign(labelsCounter.getAndIncrement());
    }

    List<Writable> ret = RecordConverter.toRecord(features);
    ret.add(new NDArrayWritable(labels));

    return Collections.singletonList(ret);
}
 
示例9
/**
 * Convert a raw record via
 * the {@link TransformProcess}
 * to a base 64ed ndarray
 * @param record the record to convert
 * @return the base 64ed ndarray
 * @throws IOException
 */
public Base64NDArrayBody toArray(SingleCSVRecord record) throws IOException {
    List<Writable> record2 = toArrowWritablesSingle(
            toArrowColumnsStringSingle(bufferAllocator,
                    transformProcess.getInitialSchema(),record.getValues()),
            transformProcess.getInitialSchema());
    List<Writable> finalRecord = execute(Arrays.asList(record2),transformProcess).get(0);
    INDArray convert = RecordConverter.toArray(finalRecord);
    return new Base64NDArrayBody(Nd4jBase64.base64String(convert));
}
 
示例10
@Test
public void toRecords_PassInClassificationDataSet_ExpectNDArrayAndIntWritables() {
    INDArray feature1 = Nd4j.create(new double[]{4, -5.7, 10, -0.1});
    INDArray feature2 = Nd4j.create(new double[]{11, .7, -1.3, 4});
    INDArray label1 = Nd4j.create(new double[]{0, 0, 1, 0});
    INDArray label2 = Nd4j.create(new double[]{0, 1, 0, 0});
    DataSet dataSet = new DataSet(Nd4j.vstack(Lists.newArrayList(feature1, feature2)),
            Nd4j.vstack(Lists.newArrayList(label1, label2)));

    List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);

    assertEquals(2, writableList.size());
    testClassificationWritables(feature1, 2, writableList.get(0));
    testClassificationWritables(feature2, 1, writableList.get(1));
}
 
示例11
@Test
public void testNDArrayWritableConcat() {
    List<Writable> l = Arrays.<Writable>asList(new DoubleWritable(1),
            new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4})), new DoubleWritable(5),
            new NDArrayWritable(Nd4j.create(new double[]{6, 7, 8})), new IntWritable(9),
            new IntWritable(1));

    INDArray exp = Nd4j.create(new double[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 1});
    INDArray act = RecordConverter.toArray(l);

    assertEquals(exp, act);
}
 
示例12
@Test
public void testNDArrayWritableConcatToMatrix(){

    List<Writable> l1 = Arrays.<Writable>asList(new DoubleWritable(1), new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4})), new DoubleWritable(5));
    List<Writable> l2 = Arrays.<Writable>asList(new DoubleWritable(6), new NDArrayWritable(Nd4j.create(new double[]{7, 8, 9})), new DoubleWritable(10));

    INDArray exp = Nd4j.create(new double[][]{
            {1,2,3,4,5},
            {6,7,8,9,10}});

    INDArray act = RecordConverter.toMatrix(Arrays.asList(l1,l2));

    assertEquals(exp, act);
}
 
示例13
@Test
public void testMeanStdZeros() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
    List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns());
    INDArray assertion = DataFrames.toMatrix(rows);
    //compare standard deviation
    assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1));
    //compare mean
    assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1));

}
 
示例14
@Test
public void testAnalysisBasic() throws Exception {

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));

    Schema s = new Schema.Builder()
            .addColumnsDouble("0", "1", "2", "3")
            .addColumnInteger("label")
            .build();

    DataAnalysis da = AnalyzeLocal.analyze(s, rr);

    System.out.println(da);

    //Compare:
    List<List<Writable>> list = new ArrayList<>();
    rr.reset();
    while(rr.hasNext()){
        list.add(rr.next());
    }

    INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, list);
    INDArray mean = arr.mean(0);
    INDArray std = arr.std(0);

    for( int i=0; i<5; i++ ){
        double m = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getMean();
        double stddev = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getSampleStdev();
        assertEquals(mean.getDouble(i), m, 1e-3);
        assertEquals(std.getDouble(i), stddev, 1e-3);
    }

}
 
示例15
/**
 * Convert a raw record via
 * the {@link TransformProcess}
 * to a base 64ed ndarray
 * @param record the record to convert
 * @return the base 64ed ndarray
 * @throws IOException
 */
public Base64NDArrayBody toArray(SingleCSVRecord record) throws IOException {
    List<Writable> record2 = toArrowWritablesSingle(
            toArrowColumnsStringSingle(bufferAllocator,
                    transformProcess.getInitialSchema(),record.getValues()),
            transformProcess.getInitialSchema());
    List<Writable> finalRecord = execute(Arrays.asList(record2),transformProcess).get(0);
    INDArray convert = RecordConverter.toArray(DataType.DOUBLE, finalRecord);
    return new Base64NDArrayBody(Nd4jBase64.base64String(convert));
}
 
示例16
@Test
public void toRecords_PassInClassificationDataSet_ExpectNDArrayAndIntWritables() {
    INDArray feature1 = Nd4j.create(new double[]{4, -5.7, 10, -0.1}, new long[]{1, 4}, DataType.FLOAT);
    INDArray feature2 = Nd4j.create(new double[]{11, .7, -1.3, 4}, new long[]{1, 4}, DataType.FLOAT);
    INDArray label1 = Nd4j.create(new double[]{0, 0, 1, 0}, new long[]{1, 4}, DataType.FLOAT);
    INDArray label2 = Nd4j.create(new double[]{0, 1, 0, 0}, new long[]{1, 4}, DataType.FLOAT);
    DataSet dataSet = new DataSet(Nd4j.vstack(Lists.newArrayList(feature1, feature2)),
            Nd4j.vstack(Lists.newArrayList(label1, label2)));

    List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);

    assertEquals(2, writableList.size());
    testClassificationWritables(feature1, 2, writableList.get(0));
    testClassificationWritables(feature2, 1, writableList.get(1));
}
 
示例17
@Test
public void testNDArrayWritableConcat() {
    List<Writable> l = Arrays.<Writable>asList(new DoubleWritable(1),
            new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4}, new long[]{1, 3}, DataType.FLOAT)), new DoubleWritable(5),
            new NDArrayWritable(Nd4j.create(new double[]{6, 7, 8}, new long[]{1, 3}, DataType.FLOAT)), new IntWritable(9),
            new IntWritable(1));

    INDArray exp = Nd4j.create(new double[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 1}, new long[]{1, 10}, DataType.FLOAT);
    INDArray act = RecordConverter.toArray(DataType.FLOAT, l);

    assertEquals(exp, act);
}
 
示例18
@Test
public void testNDArrayWritableConcatToMatrix(){

    List<Writable> l1 = Arrays.<Writable>asList(new DoubleWritable(1), new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4}, new long[]{1,3}, DataType.FLOAT)), new DoubleWritable(5));
    List<Writable> l2 = Arrays.<Writable>asList(new DoubleWritable(6), new NDArrayWritable(Nd4j.create(new double[]{7, 8, 9}, new long[]{1,3}, DataType.FLOAT)), new DoubleWritable(10));

    INDArray exp = Nd4j.create(new double[][]{
            {1,2,3,4,5},
            {6,7,8,9,10}}).castTo(DataType.FLOAT);

    INDArray act = RecordConverter.toMatrix(DataType.FLOAT, Arrays.asList(l1,l2));

    assertEquals(exp, act);
}
 
示例19
@Test
public void testToRecordWithListOfObject(){
    final List<Object> list = Arrays.asList((Object)3, 7.0f, "Foo", "Bar", 1.0, 3f, 3L, 7, 0L);
    final Schema schema = new Schema.Builder()
            .addColumnInteger("a")
            .addColumnFloat("b")
            .addColumnString("c")
            .addColumnCategorical("d", "Bar", "Baz")
            .addColumnDouble("e")
            .addColumnFloat("f")
            .addColumnLong("g")
            .addColumnInteger("h")
            .addColumnTime("i", TimeZone.getDefault())
            .build();

    final List<Writable> record = RecordConverter.toRecord(schema, list);

    assertEquals(record.get(0).toInt(), 3);
    assertEquals(record.get(1).toFloat(), 7f, 1e-6);
    assertEquals(record.get(2).toString(), "Foo");
    assertEquals(record.get(3).toString(), "Bar");
    assertEquals(record.get(4).toDouble(), 1.0, 1e-6);
    assertEquals(record.get(5).toFloat(), 3f, 1e-6);
    assertEquals(record.get(6).toLong(), 3L);
    assertEquals(record.get(7).toInt(), 7);
    assertEquals(record.get(8).toLong(), 0);


}
 
示例20
@Override
public List<Writable> next() {
    INDArray nd = Nd4j.create(new float[nZ*nY*nX], new int[] {1, 1, nZ, nY, nX }, 'c').assign(n);
    final List<Writable>res = RecordConverter.toRecord(nd);
    res.add(new IntWritable(0));
    n++;
    return res;
}
 
示例21
@Override
public List<Writable> next() {
    INDArray features = Nd4j.create(channels, height, width);
    fillNDArray(features, counter.getAndIncrement());
    features = features.reshape(1, channels, height, width);
    List<Writable> ret = RecordConverter.toRecord(features);
    ret.add(new IntWritable(RandomUtils.nextInt(0, numClasses)));
    return ret;
}
 
示例22
public static INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch) {
    return RecordConverter.toTensor(arrowWritableRecordBatch);
}
 
示例23
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.get().show();
    Normalization.zeromeanUnitVariance(dataFrame).get().show();
    Normalization.normalize(dataFrame).get().show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
示例24
@Test
public void normalizationTests() {
    List<List<Writable>> data = new ArrayList<>();
    Schema.Builder builder = new Schema.Builder();
    int numColumns = 6;
    for (int i = 0; i < numColumns; i++)
        builder.addColumnDouble(String.valueOf(i));

    for (int i = 0; i < 5; i++) {
        List<Writable> record = new ArrayList<>(numColumns);
        data.add(record);
        for (int j = 0; j < numColumns; j++) {
            record.add(new DoubleWritable(1.0));
        }

    }

    INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, data);

    Schema schema = builder.build();
    JavaRDD<List<Writable>> rdd = sc.parallelize(data);
    assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
    assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());

    Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd);
    dataFrame.show();
    Normalization.zeromeanUnitVariance(dataFrame).show();
    Normalization.normalize(dataFrame).show();

    //assert equivalent to the ndarray pre processing
    NormalizerStandardize standardScaler = new NormalizerStandardize();
    standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray standardScalered = arr.dup();
    standardScaler.transform(new DataSet(standardScalered, standardScalered));
    DataNormalization zeroToOne = new NormalizerMinMaxScaler();
    zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
    INDArray zeroToOnes = arr.dup();
    zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));

    INDArray zeroMeanUnitVarianceDataFrame =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.zeromeanUnitVariance(schema, rdd).collect());
    INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
                    RecordConverter.toMatrix(DataType.DOUBLE, Normalization.normalize(schema, rdd).collect());
    assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
    assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));

}
 
示例25
/**
 * Create an ndarray from a matrix.
 * The included batch must be all the same number of rows in order
 * to work. The reason for this is {@link INDArray} must be all the same dimensions.
 * Note that the input columns must also be numerical. If they aren't numerical already,
 * consider using an {@link org.datavec.api.transform.TransformProcess} to transform the data
 * output from {@link org.datavec.arrow.recordreader.ArrowRecordReader} in to the proper format
 * for usage with this method for direct conversion.
 *
 * @param arrowWritableRecordBatch the incoming batch. This is typically output from
 *                                 an {@link org.datavec.arrow.recordreader.ArrowRecordReader}
 * @return an {@link INDArray} representative of the input data
 */
public static INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch) {
    return RecordConverter.toTensor(arrowWritableRecordBatch);
}
 
示例26
/**
 * Create an ndarray from a matrix.
 * The included batch must be all the same number of rows in order
 * to work. The reason for this is {@link INDArray} must be all the same dimensions.
 * Note that the input columns must also be numerical. If they aren't numerical already,
 * consider using an {@link org.datavec.api.transform.TransformProcess} to transform the data
 * output from {@link org.datavec.arrow.recordreader.ArrowRecordReader} in to the proper format
 * for usage with this method for direct conversion.
 *
 * @param arrowWritableRecordBatch the incoming batch. This is typically output from
 *                                 an {@link org.datavec.arrow.recordreader.ArrowRecordReader}
 * @return an {@link INDArray} representative of the input data
 */
public static INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch) {
    return RecordConverter.toTensor(arrowWritableRecordBatch);
}