Java源码示例:org.datavec.api.util.ndarray.RecordConverter
示例1
@Override
public List<List<Writable>> sequenceRecord() {
File next = iter.next();
invokeListeners(next);
if (!next.isDirectory())
return Collections.emptyList();
File[] list = next.listFiles();
List<List<Writable>> ret = new ArrayList<>();
for (File f : list) {
try {
List<Writable> record = RecordConverter.toRecord(imageLoader.asRowVector(f));
ret.add(record);
if (appendLabel)
record.add(new DoubleWritable(labels.indexOf(next.getName())));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
return ret;
}
示例2
/**
*
* @param singleCsvRecord
* @return
*/
public Base64NDArrayBody transformSequenceArrayIncremental(BatchCSVRecord singleCsvRecord) {
List<List<List<Writable>>> converted = executeToSequence(toArrowWritables(toArrowColumnsString(
bufferAllocator,transformProcess.getInitialSchema(),
singleCsvRecord.getRecordsAsString()),
transformProcess.getInitialSchema()),transformProcess);
ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch = (ArrowWritableRecordTimeSeriesBatch) converted;
INDArray arr = RecordConverter.toTensor(arrowWritableRecordBatch);
try {
return new Base64NDArrayBody(Nd4jBase64.base64String(arr));
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
示例3
@Test
public void toRecords_PassInRegressionDataSet_ExpectNDArrayAndDoubleWritables() {
INDArray feature = Nd4j.create(new double[]{4, -5.7, 10, -0.1});
INDArray label = Nd4j.create(new double[]{.5, 2, 3, .5});
DataSet dataSet = new DataSet(feature, label);
List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);
List<Writable> results = writableList.get(0);
NDArrayWritable ndArrayWritable = (NDArrayWritable) results.get(0);
assertEquals(1, writableList.size());
assertEquals(5, results.size());
assertEquals(feature, ndArrayWritable.get());
for (int i = 0; i < label.shape()[1]; i++) {
DoubleWritable doubleWritable = (DoubleWritable) results.get(i + 1);
assertEquals(label.getDouble(i), doubleWritable.get(), 0);
}
}
示例4
@Test
public void testMinMax() {
INDArray arr = Nd4j.linspace(1, 10, 10).broadcast(10, 10);
for (int i = 0; i < arr.rows(); i++)
arr.getRow(i).addi(i);
List<List<Writable>> records = RecordConverter.toRecords(arr);
Schema.Builder builder = new Schema.Builder();
int numColumns = 10;
for (int i = 0; i < numColumns; i++)
builder.addColumnDouble(String.valueOf(i));
Schema schema = builder.build();
DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, sc.parallelize(records));
dataFrame.get().show();
dataFrame.get().describe(DataFrames.toArray(schema.getColumnNames())).show();
// System.out.println(Normalization.minMaxColumns(dataFrame,schema.getColumnNames()));
// System.out.println(Normalization.stdDevMeanColumns(dataFrame,schema.getColumnNames()));
}
示例5
/**
*
* @param singleCsvRecord
* @return
*/
public Base64NDArrayBody transformSequenceArrayIncremental(BatchCSVRecord singleCsvRecord) {
List<List<List<Writable>>> converted = executeToSequence(toArrowWritables(toArrowColumnsString(
bufferAllocator,transformProcess.getInitialSchema(),
singleCsvRecord.getRecordsAsString()),
transformProcess.getInitialSchema()),transformProcess);
ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch = (ArrowWritableRecordTimeSeriesBatch) converted;
INDArray arr = RecordConverter.toTensor(arrowWritableRecordBatch);
try {
return new Base64NDArrayBody(Nd4jBase64.base64String(arr));
} catch (IOException e) {
log.error("",e);
}
return null;
}
示例6
@Test
public void toRecords_PassInRegressionDataSet_ExpectNDArrayAndDoubleWritables() {
INDArray feature = Nd4j.create(new double[]{4, -5.7, 10, -0.1}, new long[]{1, 4}, DataType.FLOAT);
INDArray label = Nd4j.create(new double[]{.5, 2, 3, .5}, new long[]{1, 4}, DataType.FLOAT);
DataSet dataSet = new DataSet(feature, label);
List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);
List<Writable> results = writableList.get(0);
NDArrayWritable ndArrayWritable = (NDArrayWritable) results.get(0);
assertEquals(1, writableList.size());
assertEquals(5, results.size());
assertEquals(feature, ndArrayWritable.get());
for (int i = 0; i < label.shape()[1]; i++) {
DoubleWritable doubleWritable = (DoubleWritable) results.get(i + 1);
assertEquals(label.getDouble(i), doubleWritable.get(), 0);
}
}
示例7
@Test
public void testMinMax() {
INDArray arr = Nd4j.linspace(1, 10, 10).broadcast(10, 10);
for (int i = 0; i < arr.rows(); i++)
arr.getRow(i).addi(i);
List<List<Writable>> records = RecordConverter.toRecords(arr);
Schema.Builder builder = new Schema.Builder();
int numColumns = 10;
for (int i = 0; i < numColumns; i++)
builder.addColumnDouble(String.valueOf(i));
Schema schema = builder.build();
Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, sc.parallelize(records));
dataFrame.show();
dataFrame.describe(DataFrames.toArray(schema.getColumnNames())).show();
// System.out.println(Normalization.minMaxColumns(dataFrame,schema.getColumnNames()));
// System.out.println(Normalization.stdDevMeanColumns(dataFrame,schema.getColumnNames()));
}
示例8
@Override
public List<List<Writable>> next(int num) {
int numExamples = Math.min(num, limit - counter.get());
//counter.addAndGet(numExamples);
INDArray features = zFeatures;
for (int i = 0; i < numExamples; i++) {
fillNDArray(features.tensorAlongDimension(i, 1, 2, 3), counter.getAndIncrement());
}
INDArray labels = Nd4j.create(numExamples, numClasses);
for (int i = 0; i < numExamples; i++) {
labels.getRow(i).assign(labelsCounter.getAndIncrement());
}
List<Writable> ret = RecordConverter.toRecord(features);
ret.add(new NDArrayWritable(labels));
return Collections.singletonList(ret);
}
示例9
/**
* Convert a raw record via
* the {@link TransformProcess}
* to a base 64ed ndarray
* @param record the record to convert
* @return the base 64ed ndarray
* @throws IOException
*/
public Base64NDArrayBody toArray(SingleCSVRecord record) throws IOException {
List<Writable> record2 = toArrowWritablesSingle(
toArrowColumnsStringSingle(bufferAllocator,
transformProcess.getInitialSchema(),record.getValues()),
transformProcess.getInitialSchema());
List<Writable> finalRecord = execute(Arrays.asList(record2),transformProcess).get(0);
INDArray convert = RecordConverter.toArray(finalRecord);
return new Base64NDArrayBody(Nd4jBase64.base64String(convert));
}
示例10
@Test
public void toRecords_PassInClassificationDataSet_ExpectNDArrayAndIntWritables() {
INDArray feature1 = Nd4j.create(new double[]{4, -5.7, 10, -0.1});
INDArray feature2 = Nd4j.create(new double[]{11, .7, -1.3, 4});
INDArray label1 = Nd4j.create(new double[]{0, 0, 1, 0});
INDArray label2 = Nd4j.create(new double[]{0, 1, 0, 0});
DataSet dataSet = new DataSet(Nd4j.vstack(Lists.newArrayList(feature1, feature2)),
Nd4j.vstack(Lists.newArrayList(label1, label2)));
List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);
assertEquals(2, writableList.size());
testClassificationWritables(feature1, 2, writableList.get(0));
testClassificationWritables(feature2, 1, writableList.get(1));
}
示例11
@Test
public void testNDArrayWritableConcat() {
List<Writable> l = Arrays.<Writable>asList(new DoubleWritable(1),
new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4})), new DoubleWritable(5),
new NDArrayWritable(Nd4j.create(new double[]{6, 7, 8})), new IntWritable(9),
new IntWritable(1));
INDArray exp = Nd4j.create(new double[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 1});
INDArray act = RecordConverter.toArray(l);
assertEquals(exp, act);
}
示例12
@Test
public void testNDArrayWritableConcatToMatrix(){
List<Writable> l1 = Arrays.<Writable>asList(new DoubleWritable(1), new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4})), new DoubleWritable(5));
List<Writable> l2 = Arrays.<Writable>asList(new DoubleWritable(6), new NDArrayWritable(Nd4j.create(new double[]{7, 8, 9})), new DoubleWritable(10));
INDArray exp = Nd4j.create(new double[][]{
{1,2,3,4,5},
{6,7,8,9,10}});
INDArray act = RecordConverter.toMatrix(Arrays.asList(l1,l2));
assertEquals(exp, act);
}
示例13
@Test
public void testMeanStdZeros() {
List<List<Writable>> data = new ArrayList<>();
Schema.Builder builder = new Schema.Builder();
int numColumns = 6;
for (int i = 0; i < numColumns; i++)
builder.addColumnDouble(String.valueOf(i));
for (int i = 0; i < 5; i++) {
List<Writable> record = new ArrayList<>(numColumns);
data.add(record);
for (int j = 0; j < numColumns; j++) {
record.add(new DoubleWritable(1.0));
}
}
INDArray arr = RecordConverter.toMatrix(data);
Schema schema = builder.build();
JavaRDD<List<Writable>> rdd = sc.parallelize(data);
DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
//assert equivalent to the ndarray pre processing
NormalizerStandardize standardScaler = new NormalizerStandardize();
standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
INDArray standardScalered = arr.dup();
standardScaler.transform(new DataSet(standardScalered, standardScalered));
DataNormalization zeroToOne = new NormalizerMinMaxScaler();
zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
INDArray zeroToOnes = arr.dup();
zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
List<Row> rows = Normalization.stdDevMeanColumns(dataFrame, dataFrame.get().columns());
INDArray assertion = DataFrames.toMatrix(rows);
//compare standard deviation
assertTrue(standardScaler.getStd().equalsWithEps(assertion.getRow(0), 1e-1));
//compare mean
assertTrue(standardScaler.getMean().equalsWithEps(assertion.getRow(1), 1e-1));
}
示例14
@Test
public void testAnalysisBasic() throws Exception {
RecordReader rr = new CSVRecordReader();
rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
Schema s = new Schema.Builder()
.addColumnsDouble("0", "1", "2", "3")
.addColumnInteger("label")
.build();
DataAnalysis da = AnalyzeLocal.analyze(s, rr);
System.out.println(da);
//Compare:
List<List<Writable>> list = new ArrayList<>();
rr.reset();
while(rr.hasNext()){
list.add(rr.next());
}
INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, list);
INDArray mean = arr.mean(0);
INDArray std = arr.std(0);
for( int i=0; i<5; i++ ){
double m = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getMean();
double stddev = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getSampleStdev();
assertEquals(mean.getDouble(i), m, 1e-3);
assertEquals(std.getDouble(i), stddev, 1e-3);
}
}
示例15
/**
* Convert a raw record via
* the {@link TransformProcess}
* to a base 64ed ndarray
* @param record the record to convert
* @return the base 64ed ndarray
* @throws IOException
*/
public Base64NDArrayBody toArray(SingleCSVRecord record) throws IOException {
List<Writable> record2 = toArrowWritablesSingle(
toArrowColumnsStringSingle(bufferAllocator,
transformProcess.getInitialSchema(),record.getValues()),
transformProcess.getInitialSchema());
List<Writable> finalRecord = execute(Arrays.asList(record2),transformProcess).get(0);
INDArray convert = RecordConverter.toArray(DataType.DOUBLE, finalRecord);
return new Base64NDArrayBody(Nd4jBase64.base64String(convert));
}
示例16
@Test
public void toRecords_PassInClassificationDataSet_ExpectNDArrayAndIntWritables() {
INDArray feature1 = Nd4j.create(new double[]{4, -5.7, 10, -0.1}, new long[]{1, 4}, DataType.FLOAT);
INDArray feature2 = Nd4j.create(new double[]{11, .7, -1.3, 4}, new long[]{1, 4}, DataType.FLOAT);
INDArray label1 = Nd4j.create(new double[]{0, 0, 1, 0}, new long[]{1, 4}, DataType.FLOAT);
INDArray label2 = Nd4j.create(new double[]{0, 1, 0, 0}, new long[]{1, 4}, DataType.FLOAT);
DataSet dataSet = new DataSet(Nd4j.vstack(Lists.newArrayList(feature1, feature2)),
Nd4j.vstack(Lists.newArrayList(label1, label2)));
List<List<Writable>> writableList = RecordConverter.toRecords(dataSet);
assertEquals(2, writableList.size());
testClassificationWritables(feature1, 2, writableList.get(0));
testClassificationWritables(feature2, 1, writableList.get(1));
}
示例17
@Test
public void testNDArrayWritableConcat() {
List<Writable> l = Arrays.<Writable>asList(new DoubleWritable(1),
new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4}, new long[]{1, 3}, DataType.FLOAT)), new DoubleWritable(5),
new NDArrayWritable(Nd4j.create(new double[]{6, 7, 8}, new long[]{1, 3}, DataType.FLOAT)), new IntWritable(9),
new IntWritable(1));
INDArray exp = Nd4j.create(new double[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 1}, new long[]{1, 10}, DataType.FLOAT);
INDArray act = RecordConverter.toArray(DataType.FLOAT, l);
assertEquals(exp, act);
}
示例18
@Test
public void testNDArrayWritableConcatToMatrix(){
List<Writable> l1 = Arrays.<Writable>asList(new DoubleWritable(1), new NDArrayWritable(Nd4j.create(new double[]{2, 3, 4}, new long[]{1,3}, DataType.FLOAT)), new DoubleWritable(5));
List<Writable> l2 = Arrays.<Writable>asList(new DoubleWritable(6), new NDArrayWritable(Nd4j.create(new double[]{7, 8, 9}, new long[]{1,3}, DataType.FLOAT)), new DoubleWritable(10));
INDArray exp = Nd4j.create(new double[][]{
{1,2,3,4,5},
{6,7,8,9,10}}).castTo(DataType.FLOAT);
INDArray act = RecordConverter.toMatrix(DataType.FLOAT, Arrays.asList(l1,l2));
assertEquals(exp, act);
}
示例19
@Test
public void testToRecordWithListOfObject(){
final List<Object> list = Arrays.asList((Object)3, 7.0f, "Foo", "Bar", 1.0, 3f, 3L, 7, 0L);
final Schema schema = new Schema.Builder()
.addColumnInteger("a")
.addColumnFloat("b")
.addColumnString("c")
.addColumnCategorical("d", "Bar", "Baz")
.addColumnDouble("e")
.addColumnFloat("f")
.addColumnLong("g")
.addColumnInteger("h")
.addColumnTime("i", TimeZone.getDefault())
.build();
final List<Writable> record = RecordConverter.toRecord(schema, list);
assertEquals(record.get(0).toInt(), 3);
assertEquals(record.get(1).toFloat(), 7f, 1e-6);
assertEquals(record.get(2).toString(), "Foo");
assertEquals(record.get(3).toString(), "Bar");
assertEquals(record.get(4).toDouble(), 1.0, 1e-6);
assertEquals(record.get(5).toFloat(), 3f, 1e-6);
assertEquals(record.get(6).toLong(), 3L);
assertEquals(record.get(7).toInt(), 7);
assertEquals(record.get(8).toLong(), 0);
}
示例20
@Override
public List<Writable> next() {
INDArray nd = Nd4j.create(new float[nZ*nY*nX], new int[] {1, 1, nZ, nY, nX }, 'c').assign(n);
final List<Writable>res = RecordConverter.toRecord(nd);
res.add(new IntWritable(0));
n++;
return res;
}
示例21
@Override
public List<Writable> next() {
INDArray features = Nd4j.create(channels, height, width);
fillNDArray(features, counter.getAndIncrement());
features = features.reshape(1, channels, height, width);
List<Writable> ret = RecordConverter.toRecord(features);
ret.add(new IntWritable(RandomUtils.nextInt(0, numClasses)));
return ret;
}
示例22
public static INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch) {
return RecordConverter.toTensor(arrowWritableRecordBatch);
}
示例23
@Test
public void normalizationTests() {
List<List<Writable>> data = new ArrayList<>();
Schema.Builder builder = new Schema.Builder();
int numColumns = 6;
for (int i = 0; i < numColumns; i++)
builder.addColumnDouble(String.valueOf(i));
for (int i = 0; i < 5; i++) {
List<Writable> record = new ArrayList<>(numColumns);
data.add(record);
for (int j = 0; j < numColumns; j++) {
record.add(new DoubleWritable(1.0));
}
}
INDArray arr = RecordConverter.toMatrix(data);
Schema schema = builder.build();
JavaRDD<List<Writable>> rdd = sc.parallelize(data);
assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());
DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
dataFrame.get().show();
Normalization.zeromeanUnitVariance(dataFrame).get().show();
Normalization.normalize(dataFrame).get().show();
//assert equivalent to the ndarray pre processing
NormalizerStandardize standardScaler = new NormalizerStandardize();
standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
INDArray standardScalered = arr.dup();
standardScaler.transform(new DataSet(standardScalered, standardScalered));
DataNormalization zeroToOne = new NormalizerMinMaxScaler();
zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
INDArray zeroToOnes = arr.dup();
zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
INDArray zeroMeanUnitVarianceDataFrame =
RecordConverter.toMatrix(Normalization.zeromeanUnitVariance(schema, rdd).collect());
INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
RecordConverter.toMatrix(Normalization.normalize(schema, rdd).collect());
assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));
}
示例24
@Test
public void normalizationTests() {
List<List<Writable>> data = new ArrayList<>();
Schema.Builder builder = new Schema.Builder();
int numColumns = 6;
for (int i = 0; i < numColumns; i++)
builder.addColumnDouble(String.valueOf(i));
for (int i = 0; i < 5; i++) {
List<Writable> record = new ArrayList<>(numColumns);
data.add(record);
for (int j = 0; j < numColumns; j++) {
record.add(new DoubleWritable(1.0));
}
}
INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, data);
Schema schema = builder.build();
JavaRDD<List<Writable>> rdd = sc.parallelize(data);
assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());
Dataset<Row> dataFrame = DataFrames.toDataFrame(schema, rdd);
dataFrame.show();
Normalization.zeromeanUnitVariance(dataFrame).show();
Normalization.normalize(dataFrame).show();
//assert equivalent to the ndarray pre processing
NormalizerStandardize standardScaler = new NormalizerStandardize();
standardScaler.fit(new DataSet(arr.dup(), arr.dup()));
INDArray standardScalered = arr.dup();
standardScaler.transform(new DataSet(standardScalered, standardScalered));
DataNormalization zeroToOne = new NormalizerMinMaxScaler();
zeroToOne.fit(new DataSet(arr.dup(), arr.dup()));
INDArray zeroToOnes = arr.dup();
zeroToOne.transform(new DataSet(zeroToOnes, zeroToOnes));
INDArray zeroMeanUnitVarianceDataFrame =
RecordConverter.toMatrix(DataType.DOUBLE, Normalization.zeromeanUnitVariance(schema, rdd).collect());
INDArray zeroMeanUnitVarianceDataFrameZeroToOne =
RecordConverter.toMatrix(DataType.DOUBLE, Normalization.normalize(schema, rdd).collect());
assertEquals(standardScalered, zeroMeanUnitVarianceDataFrame);
assertTrue(zeroToOnes.equalsWithEps(zeroMeanUnitVarianceDataFrameZeroToOne, 1e-1));
}
示例25
/**
* Create an ndarray from a matrix.
* The included batch must be all the same number of rows in order
* to work. The reason for this is {@link INDArray} must be all the same dimensions.
* Note that the input columns must also be numerical. If they aren't numerical already,
* consider using an {@link org.datavec.api.transform.TransformProcess} to transform the data
* output from {@link org.datavec.arrow.recordreader.ArrowRecordReader} in to the proper format
* for usage with this method for direct conversion.
*
* @param arrowWritableRecordBatch the incoming batch. This is typically output from
* an {@link org.datavec.arrow.recordreader.ArrowRecordReader}
* @return an {@link INDArray} representative of the input data
*/
public static INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch) {
return RecordConverter.toTensor(arrowWritableRecordBatch);
}
示例26
/**
* Create an ndarray from a matrix.
* The included batch must be all the same number of rows in order
* to work. The reason for this is {@link INDArray} must be all the same dimensions.
* Note that the input columns must also be numerical. If they aren't numerical already,
* consider using an {@link org.datavec.api.transform.TransformProcess} to transform the data
* output from {@link org.datavec.arrow.recordreader.ArrowRecordReader} in to the proper format
* for usage with this method for direct conversion.
*
* @param arrowWritableRecordBatch the incoming batch. This is typically output from
* an {@link org.datavec.arrow.recordreader.ArrowRecordReader}
* @return an {@link INDArray} representative of the input data
*/
public static INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch) {
return RecordConverter.toTensor(arrowWritableRecordBatch);
}