Java源码示例:org.jpmml.converter.BinaryFeature
示例1
static
public <C extends ModelConverter<?> & HasRegressionTableOptions> void simplify(C converter, Object identifier, List<Feature> features, List<Double> coefficients){
SchemaUtil.checkSize(coefficients.size(), features);
Integer lookupThreshold = (Integer)converter.getOption(HasRegressionTableOptions.OPTION_LOOKUP_THRESHOLD, null);
if(lookupThreshold == null){
return;
}
Map<FieldName, Long> countMap = features.stream()
.filter(feature -> (feature instanceof BinaryFeature))
.collect(Collectors.groupingBy(feature -> ((BinaryFeature)feature).getName(), Collectors.counting()));
Collection<? extends Map.Entry<FieldName, Long>> entries = countMap.entrySet();
for(Map.Entry<FieldName, Long> entry : entries){
if(entry.getValue() < lookupThreshold){
continue;
}
createMapValues(entry.getKey(), identifier, features, coefficients);
}
}
示例2
static
private Schema toTreeModelSchema(DataType dataType, Schema schema){
Function<Feature, Feature> function = new Function<Feature, Feature>(){
@Override
public Feature apply(Feature feature){
if(feature instanceof BinaryFeature){
BinaryFeature binaryFeature = (BinaryFeature)feature;
return binaryFeature;
} else
{
ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType);
return continuousFeature;
}
}
};
return schema.toTransformedSchema(function);
}
示例3
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
Integer power = getPower();
List<Feature> result = new ArrayList<>();
for(Feature feature : features){
if(feature instanceof BinaryFeature){
BinaryFeature binaryFeature = (BinaryFeature)feature;
result.add(binaryFeature);
} else
{
ContinuousFeature continuousFeature = feature.toContinuousFeature();
result.add(new PowerFeature(encoder, continuousFeature, power));
}
}
return result;
}
示例4
static
public <C extends ModelConverter<?> & HasXGBoostOptions> MiningModel encodeBooster(C converter, Booster booster, Schema schema){
byte[] bytes = booster.toByteArray();
Learner learner;
try(InputStream is = new ByteArrayInputStream(bytes)){
learner = XGBoostUtil.loadLearner(is);
} catch(IOException ioe){
throw new RuntimeException(ioe);
}
Function<Feature, Feature> function = new Function<Feature, Feature>(){
@Override
public Feature apply(Feature feature){
if(feature instanceof BinaryFeature){
BinaryFeature binaryFeature = (BinaryFeature)feature;
return binaryFeature;
} else
{
ContinuousFeature continuousFeature = feature.toContinuousFeature(DataType.FLOAT);
return continuousFeature;
}
}
};
Map<String, Object> options = new LinkedHashMap<>();
options.put(HasXGBoostOptions.OPTION_COMPACT, converter.getOption(HasXGBoostOptions.OPTION_COMPACT, false));
options.put(HasXGBoostOptions.OPTION_NTREE_LIMIT, converter.getOption(HasXGBoostOptions.OPTION_NTREE_LIMIT, null));
Schema xgbSchema = schema.toTransformedSchema(function);
return learner.encodeMiningModel(options, xgbSchema);
}
示例5
static
public List<BinaryFeature> encodeFeature(PMMLEncoder encoder, Feature feature, List<?> values, boolean dropLast){
List<BinaryFeature> result = new ArrayList<>();
if(dropLast){
values = values.subList(0, values.size() - 1);
}
for(Object value : values){
result.add(new BinaryFeature(encoder, feature, value));
}
return result;
}
示例6
private void setBinaryFeatures(List<BinaryFeature> binaryFeatures){
if(binaryFeatures == null || binaryFeatures.size() < 1){
throw new IllegalArgumentException();
}
this.binaryFeatures = binaryFeatures;
}
示例7
public Schema toXGBoostSchema(Schema schema){
Function<Feature, Feature> function = new Function<Feature, Feature>(){
@Override
public Feature apply(Feature feature){
if(feature instanceof BinaryFeature){
BinaryFeature binaryFeature = (BinaryFeature)feature;
return binaryFeature;
} else
{
ContinuousFeature continuousFeature = feature.toContinuousFeature();
DataType dataType = continuousFeature.getDataType();
switch(dataType){
case INTEGER:
case FLOAT:
break;
case DOUBLE:
continuousFeature = continuousFeature.toContinuousFeature(DataType.FLOAT);
break;
default:
throw new IllegalArgumentException("Expected integer, float or double data type, got " + dataType.value() + " data type");
}
return continuousFeature;
}
}
};
return schema.toTransformedSchema(function);
}
示例8
static
private Predicate encodePredicate(Feature feature, Node node, boolean left){
FieldName name = feature.getName();
SimplePredicate.Operator operator;
String value;
if(feature instanceof BinaryFeature){
BinaryFeature binaryFeature = (BinaryFeature)feature;
operator = (left ? SimplePredicate.Operator.NOT_EQUAL : SimplePredicate.Operator.EQUAL);
value = binaryFeature.getValue();
} else
{
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Number splitValue = node.getThreshold();
DataType dataType = continuousFeature.getDataType();
switch(dataType){
case INTEGER:
splitValue = (int)(splitValue.floatValue() + 1f);
break;
case FLOAT:
break;
default:
throw new IllegalArgumentException();
}
operator = (left ? SimplePredicate.Operator.LESS_OR_EQUAL : SimplePredicate.Operator.GREATER_THAN);
value = ValueUtil.formatValue(splitValue);
}
SimplePredicate simplePredicate = new SimplePredicate(name, operator)
.setValue(value);
return simplePredicate;
}
示例9
public BinaryCategoricalFeature(PMMLEncoder encoder, BinaryFeature binaryFeature){
super(encoder, binaryFeature, Arrays.asList(null, binaryFeature.getValue()));
}
示例10
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
OneHotEncoderModel transformer = getTransformer();
boolean dropLast = transformer.getDropLast();
InOutMode inputMode = getInputMode();
List<Feature> result = new ArrayList<>();
String[] inputCols = inputMode.getInputCols(transformer);
for(String inputCol : inputCols){
CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(inputCol);
List<?> values = categoricalFeature.getValues();
List<BinaryFeature> binaryFeatures = OneHotEncoderModelConverter.encodeFeature(encoder, categoricalFeature, values, dropLast);
result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures));
}
return result;
}
示例11
static
private MapValues createMapValues(FieldName name, Object identifier, List<Feature> features, List<Double> coefficients){
ListIterator<Feature> featureIt = features.listIterator();
ListIterator<Double> coefficientIt = coefficients.listIterator();
PMMLEncoder encoder = null;
List<Object> inputValues = new ArrayList<>();
List<Double> outputValues = new ArrayList<>();
while(featureIt.hasNext()){
Feature feature = featureIt.next();
Double coefficient = coefficientIt.next();
if(!(feature instanceof BinaryFeature)){
continue;
}
BinaryFeature binaryFeature = (BinaryFeature)feature;
if(!(name).equals(binaryFeature.getName())){
continue;
}
featureIt.remove();
coefficientIt.remove();
if(encoder == null){
encoder = binaryFeature.getEncoder();
}
inputValues.add(binaryFeature.getValue());
outputValues.add(coefficient);
}
MapValues mapValues = PMMLUtil.createMapValues(name, inputValues, outputValues)
.setDefaultValue(0d)
.setDataType(DataType.DOUBLE);
DerivedField derivedField = encoder.createDerivedField(FieldName.create("lookup(" + name.getValue() + (identifier != null ? (", " + identifier) : "") + ")"), OpType.CONTINUOUS, DataType.DOUBLE, mapValues);
featureIt.add(new ContinuousFeature(encoder, derivedField));
coefficientIt.add(1d);
return mapValues;
}
示例12
public BinarizedCategoricalFeature(PMMLEncoder encoder, FieldName name, DataType dataType, List<BinaryFeature> binaryFeatures){
super(encoder, name, dataType);
setBinaryFeatures(binaryFeatures);
}
示例13
public List<BinaryFeature> getBinaryFeatures(){
return this.binaryFeatures;
}
示例14
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
List<List<?>> categories = getCategories();
ClassDictUtil.checkSize(categories, features);
Object drop = getDrop();
List<Integer> dropIdx = (drop != null ? getDropIdx() : null);
List<Feature> result = new ArrayList<>();
for(int i = 0; i < features.size(); i++){
Feature feature = features.get(i);
List<?> featureCategories = categories.get(i);
if(feature instanceof CategoricalFeature){
CategoricalFeature categoricalFeature = (CategoricalFeature)feature;
ClassDictUtil.checkSize(featureCategories, categoricalFeature.getValues());
featureCategories = categoricalFeature.getValues();
} else
if(feature instanceof ObjectFeature){
ObjectFeature objectFeature = (ObjectFeature)feature;
} else
if(feature instanceof WildcardFeature){
WildcardFeature wildcardFeature = (WildcardFeature)feature;
feature = wildcardFeature.toCategoricalFeature(featureCategories);
} else
{
throw new IllegalArgumentException();
} // End if
if(dropIdx != null){
// Unbox to primitive value in order to ensure correct List#remove(int) vs. List#remove(Object) method resolution
int index = dropIdx.get(i);
featureCategories = new ArrayList<>(featureCategories);
featureCategories.remove(index);
}
for(int j = 0; j < featureCategories.size(); j++){
Object featureCategory = featureCategories.get(j);
result.add(new BinaryFeature(encoder, feature, featureCategory));
}
}
return result;
}
示例15
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
List<?> classes = getClasses();
Number negLabel = getNegLabel();
Number posLabel = getPosLabel();
ClassDictUtil.checkSize(1, features);
Feature feature = features.get(0);
List<Object> categories = new ArrayList<>();
categories.addAll(classes);
List<Number> labelCategories = new ArrayList<>();
labelCategories.add(negLabel);
labelCategories.add(posLabel);
List<Feature> result = new ArrayList<>();
classes = prepareClasses(classes);
for(int i = 0; i < classes.size(); i++){
Object value = classes.get(i);
if(ValueUtil.isZero(negLabel) && ValueUtil.isOne(posLabel)){
result.add(new BinaryFeature(encoder, feature, value));
} else
{
// "($name == value) ? pos_label : neg_label"
Apply apply = PMMLUtil.createApply(PMMLFunctions.IF)
.addExpressions(PMMLUtil.createApply(PMMLFunctions.EQUAL, feature.ref(), PMMLUtil.createConstant(value, feature.getDataType())))
.addExpressions(PMMLUtil.createConstant(posLabel), PMMLUtil.createConstant(negLabel));
FieldName name = (classes.size() > 1 ? FeatureUtil.createName("label_binarizer", feature, i) : FeatureUtil.createName("label_binarizer", feature));
DerivedField derivedField = encoder.createDerivedField(name, apply);
result.add(new CategoricalFeature(encoder, derivedField, labelCategories));
}
}
encoder.toCategorical(feature.getName(), categories);
return result;
}
示例16
@Test
public void encode(){
SkLearnEncoder encoder = new SkLearnEncoder();
DataField dataField = encoder.createDataField(FieldName.create("x"), OpType.CATEGORICAL, DataType.INTEGER);
Feature inputFeature = new WildcardFeature(encoder, dataField);
assertEquals(Arrays.asList(), PMMLUtil.getValues(dataField));
OneHotEncoder oneHotEncoder = new OneHotEncoder("sklearn.preprocessing.data", "OneHotEncoder");
oneHotEncoder.put("n_values_", 3);
List<Feature> outputFeatures = oneHotEncoder.encodeFeatures(Collections.singletonList(inputFeature), encoder);
for(int i = 0; i < 3; i++){
BinaryFeature outputFeature = (BinaryFeature)outputFeatures.get(i);
assertEquals(i, outputFeature.getValue());
}
assertEquals(Arrays.asList(0, 1, 2), PMMLUtil.getValues(dataField));
}
示例17
public List<BinaryFeature> createBinaryFeatures(SavedModel savedModel, NodeDef placeholder, List<String> categories){
DataField dataField = ensureCategoricalDataField(savedModel, placeholder, categories);
List<BinaryFeature> result = new ArrayList<>();
for(String category : categories){
BinaryFeature binaryFeature = new BinaryFeature(this, dataField, category);
result.add(binaryFeature);
}
return result;
}
示例18
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
List<? extends Number> values = getValues();
ClassDictUtil.checkSize(1, features);
Feature feature = features.get(0);
List<Feature> result = new ArrayList<>();
if(feature instanceof CategoricalFeature){
CategoricalFeature categoricalFeature = (CategoricalFeature)feature;
ClassDictUtil.checkSize(values, categoricalFeature.getValues());
for(int i = 0; i < values.size(); i++){
result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i)));
}
} else
if(feature instanceof WildcardFeature){
WildcardFeature wildcardFeature = (WildcardFeature)feature;
List<Integer> categories = new ArrayList<>();
for(int i = 0; i < values.size(); i++){
Number value = values.get(i);
Integer category = ValueUtil.asInt(value);
categories.add(category);
result.add(new BinaryFeature(encoder, wildcardFeature, category));
}
wildcardFeature.toCategoricalFeature(categories);
} else
{
throw new IllegalArgumentException();
}
return result;
}