Java源码示例:org.jpmml.converter.BinaryFeature

示例1
static
public <C extends ModelConverter<?> & HasRegressionTableOptions> void simplify(C converter, Object identifier, List<Feature> features, List<Double> coefficients){
	SchemaUtil.checkSize(coefficients.size(), features);

	Integer lookupThreshold = (Integer)converter.getOption(HasRegressionTableOptions.OPTION_LOOKUP_THRESHOLD, null);
	if(lookupThreshold == null){
		return;
	}

	Map<FieldName, Long> countMap = features.stream()
		.filter(feature -> (feature instanceof BinaryFeature))
		.collect(Collectors.groupingBy(feature -> ((BinaryFeature)feature).getName(), Collectors.counting()));

	Collection<? extends Map.Entry<FieldName, Long>> entries = countMap.entrySet();
	for(Map.Entry<FieldName, Long> entry : entries){

		if(entry.getValue() < lookupThreshold){
			continue;
		}

		createMapValues(entry.getKey(), identifier, features, coefficients);
	}
}
 
示例2
static
private Schema toTreeModelSchema(DataType dataType, Schema schema){
	Function<Feature, Feature> function = new Function<Feature, Feature>(){

		@Override
		public Feature apply(Feature feature){

			if(feature instanceof BinaryFeature){
				BinaryFeature binaryFeature = (BinaryFeature)feature;

				return binaryFeature;
			} else

			{
				ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType);

				return continuousFeature;
			}
		}
	};

	return schema.toTransformedSchema(function);
}
 
示例3
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Integer power = getPower();

	List<Feature> result = new ArrayList<>();

	for(Feature feature : features){

		if(feature instanceof BinaryFeature){
			BinaryFeature binaryFeature = (BinaryFeature)feature;

			result.add(binaryFeature);
		} else

		{
			ContinuousFeature continuousFeature = feature.toContinuousFeature();

			result.add(new PowerFeature(encoder, continuousFeature, power));
		}
	}

	return result;
}
 
示例4
static
public <C extends ModelConverter<?> & HasXGBoostOptions> MiningModel encodeBooster(C converter, Booster booster, Schema schema){
	byte[] bytes = booster.toByteArray();

	Learner learner;

	try(InputStream is = new ByteArrayInputStream(bytes)){
		learner = XGBoostUtil.loadLearner(is);
	} catch(IOException ioe){
		throw new RuntimeException(ioe);
	}

	Function<Feature, Feature> function = new Function<Feature, Feature>(){

		@Override
		public Feature apply(Feature feature){

			if(feature instanceof BinaryFeature){
				BinaryFeature binaryFeature = (BinaryFeature)feature;

				return binaryFeature;
			} else

			{
				ContinuousFeature continuousFeature = feature.toContinuousFeature(DataType.FLOAT);

				return continuousFeature;
			}
		}
	};

	Map<String, Object> options = new LinkedHashMap<>();
	options.put(HasXGBoostOptions.OPTION_COMPACT, converter.getOption(HasXGBoostOptions.OPTION_COMPACT, false));
	options.put(HasXGBoostOptions.OPTION_NTREE_LIMIT, converter.getOption(HasXGBoostOptions.OPTION_NTREE_LIMIT, null));

	Schema xgbSchema = schema.toTransformedSchema(function);

	return learner.encodeMiningModel(options, xgbSchema);
}
 
示例5
static
public List<BinaryFeature> encodeFeature(PMMLEncoder encoder, Feature feature, List<?> values, boolean dropLast){
	List<BinaryFeature> result = new ArrayList<>();

	if(dropLast){
		values = values.subList(0, values.size() - 1);
	}

	for(Object value : values){
		result.add(new BinaryFeature(encoder, feature, value));
	}

	return result;
}
 
示例6
private void setBinaryFeatures(List<BinaryFeature> binaryFeatures){

		if(binaryFeatures == null || binaryFeatures.size() < 1){
			throw new IllegalArgumentException();
		}

		this.binaryFeatures = binaryFeatures;
	}
 
示例7
public Schema toXGBoostSchema(Schema schema){
	Function<Feature, Feature> function = new Function<Feature, Feature>(){

		@Override
		public Feature apply(Feature feature){

			if(feature instanceof BinaryFeature){
				BinaryFeature binaryFeature = (BinaryFeature)feature;

				return binaryFeature;
			} else

			{
				ContinuousFeature continuousFeature = feature.toContinuousFeature();

				DataType dataType = continuousFeature.getDataType();
				switch(dataType){
					case INTEGER:
					case FLOAT:
						break;
					case DOUBLE:
						continuousFeature = continuousFeature.toContinuousFeature(DataType.FLOAT);
						break;
					default:
						throw new IllegalArgumentException("Expected integer, float or double data type, got " + dataType.value() + " data type");
				}

				return continuousFeature;
			}
		}
	};

	return schema.toTransformedSchema(function);
}
 
示例8
static
private Predicate encodePredicate(Feature feature, Node node, boolean left){
    FieldName name = feature.getName();
    SimplePredicate.Operator operator;
    String value;

    if(feature instanceof BinaryFeature){
        BinaryFeature binaryFeature = (BinaryFeature)feature;

        operator = (left ? SimplePredicate.Operator.NOT_EQUAL : SimplePredicate.Operator.EQUAL);
        value = binaryFeature.getValue();
    } else

    {
        ContinuousFeature continuousFeature = feature.toContinuousFeature();

        Number splitValue = node.getThreshold();

        DataType dataType = continuousFeature.getDataType();
        switch(dataType){
            case INTEGER:
                splitValue = (int)(splitValue.floatValue() + 1f);
                break;
            case FLOAT:
                break;
            default:
                throw new IllegalArgumentException();
        }

        operator = (left ? SimplePredicate.Operator.LESS_OR_EQUAL : SimplePredicate.Operator.GREATER_THAN);
        value = ValueUtil.formatValue(splitValue);
    }

    SimplePredicate simplePredicate = new SimplePredicate(name, operator)
            .setValue(value);

    return simplePredicate;
}
 
示例9
public BinaryCategoricalFeature(PMMLEncoder encoder, BinaryFeature binaryFeature){
	super(encoder, binaryFeature, Arrays.asList(null, binaryFeature.getValue()));
}
 
示例10
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	OneHotEncoderModel transformer = getTransformer();

	boolean dropLast = transformer.getDropLast();

	InOutMode inputMode = getInputMode();

	List<Feature> result = new ArrayList<>();

	String[] inputCols = inputMode.getInputCols(transformer);
	for(String inputCol : inputCols){
		CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(inputCol);

		List<?> values = categoricalFeature.getValues();

		List<BinaryFeature> binaryFeatures = OneHotEncoderModelConverter.encodeFeature(encoder, categoricalFeature, values, dropLast);

		result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures));
	}

	return result;
}
 
示例11
static
private MapValues createMapValues(FieldName name, Object identifier, List<Feature> features, List<Double> coefficients){
	ListIterator<Feature> featureIt = features.listIterator();
	ListIterator<Double> coefficientIt = coefficients.listIterator();

	PMMLEncoder encoder = null;

	List<Object> inputValues = new ArrayList<>();
	List<Double> outputValues = new ArrayList<>();

	while(featureIt.hasNext()){
		Feature feature = featureIt.next();
		Double coefficient = coefficientIt.next();

		if(!(feature instanceof BinaryFeature)){
			continue;
		}

		BinaryFeature binaryFeature = (BinaryFeature)feature;
		if(!(name).equals(binaryFeature.getName())){
			continue;
		}

		featureIt.remove();
		coefficientIt.remove();

		if(encoder == null){
			encoder = binaryFeature.getEncoder();
		}

		inputValues.add(binaryFeature.getValue());
		outputValues.add(coefficient);
	}

	MapValues mapValues = PMMLUtil.createMapValues(name, inputValues, outputValues)
		.setDefaultValue(0d)
		.setDataType(DataType.DOUBLE);

	DerivedField derivedField = encoder.createDerivedField(FieldName.create("lookup(" + name.getValue() + (identifier != null ? (", " + identifier) : "") + ")"), OpType.CONTINUOUS, DataType.DOUBLE, mapValues);

	featureIt.add(new ContinuousFeature(encoder, derivedField));
	coefficientIt.add(1d);

	return mapValues;
}
 
示例12
public BinarizedCategoricalFeature(PMMLEncoder encoder, FieldName name, DataType dataType, List<BinaryFeature> binaryFeatures){
	super(encoder, name, dataType);

	setBinaryFeatures(binaryFeatures);
}
 
示例13
public List<BinaryFeature> getBinaryFeatures(){
	return this.binaryFeatures;
}
 
示例14
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<List<?>> categories = getCategories();

	ClassDictUtil.checkSize(categories, features);

	Object drop = getDrop();
	List<Integer> dropIdx = (drop != null ? getDropIdx() : null);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);
		List<?> featureCategories = categories.get(i);

		if(feature instanceof CategoricalFeature){
			CategoricalFeature categoricalFeature = (CategoricalFeature)feature;

			ClassDictUtil.checkSize(featureCategories, categoricalFeature.getValues());

			featureCategories = categoricalFeature.getValues();
		} else

		if(feature instanceof ObjectFeature){
			ObjectFeature objectFeature = (ObjectFeature)feature;
		} else

		if(feature instanceof WildcardFeature){
			WildcardFeature wildcardFeature = (WildcardFeature)feature;

			feature = wildcardFeature.toCategoricalFeature(featureCategories);
		} else

		{
			throw new IllegalArgumentException();
		} // End if

		if(dropIdx != null){
			// Unbox to primitive value in order to ensure correct List#remove(int) vs. List#remove(Object) method resolution
			int index = dropIdx.get(i);

			featureCategories = new ArrayList<>(featureCategories);
			featureCategories.remove(index);
		}

		for(int j = 0; j < featureCategories.size(); j++){
			Object featureCategory = featureCategories.get(j);

			result.add(new BinaryFeature(encoder, feature, featureCategory));
		}
	}

	return result;
}
 
示例15
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<?> classes = getClasses();

	Number negLabel = getNegLabel();
	Number posLabel = getPosLabel();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);

	List<Object> categories = new ArrayList<>();
	categories.addAll(classes);

	List<Number> labelCategories = new ArrayList<>();
	labelCategories.add(negLabel);
	labelCategories.add(posLabel);

	List<Feature> result = new ArrayList<>();

	classes = prepareClasses(classes);

	for(int i = 0; i < classes.size(); i++){
		Object value = classes.get(i);

		if(ValueUtil.isZero(negLabel) && ValueUtil.isOne(posLabel)){
			result.add(new BinaryFeature(encoder, feature, value));
		} else

		{
			// "($name == value) ? pos_label : neg_label"
			Apply apply = PMMLUtil.createApply(PMMLFunctions.IF)
				.addExpressions(PMMLUtil.createApply(PMMLFunctions.EQUAL, feature.ref(), PMMLUtil.createConstant(value, feature.getDataType())))
				.addExpressions(PMMLUtil.createConstant(posLabel), PMMLUtil.createConstant(negLabel));

			FieldName name = (classes.size() > 1 ? FeatureUtil.createName("label_binarizer", feature, i) : FeatureUtil.createName("label_binarizer", feature));

			DerivedField derivedField = encoder.createDerivedField(name, apply);

			result.add(new CategoricalFeature(encoder, derivedField, labelCategories));
		}
	}

	encoder.toCategorical(feature.getName(), categories);

	return result;
}
 
示例16
@Test
public void encode(){
	SkLearnEncoder encoder = new SkLearnEncoder();

	DataField dataField = encoder.createDataField(FieldName.create("x"), OpType.CATEGORICAL, DataType.INTEGER);

	Feature inputFeature = new WildcardFeature(encoder, dataField);

	assertEquals(Arrays.asList(), PMMLUtil.getValues(dataField));

	OneHotEncoder oneHotEncoder = new OneHotEncoder("sklearn.preprocessing.data", "OneHotEncoder");
	oneHotEncoder.put("n_values_", 3);

	List<Feature> outputFeatures = oneHotEncoder.encodeFeatures(Collections.singletonList(inputFeature), encoder);
	for(int i = 0; i < 3; i++){
		BinaryFeature outputFeature = (BinaryFeature)outputFeatures.get(i);

		assertEquals(i, outputFeature.getValue());
	}

	assertEquals(Arrays.asList(0, 1, 2), PMMLUtil.getValues(dataField));
}
 
示例17
public List<BinaryFeature> createBinaryFeatures(SavedModel savedModel, NodeDef placeholder, List<String> categories){
	DataField dataField = ensureCategoricalDataField(savedModel, placeholder, categories);

	List<BinaryFeature> result = new ArrayList<>();

	for(String category : categories){
		BinaryFeature binaryFeature = new BinaryFeature(this, dataField, category);

		result.add(binaryFeature);
	}

	return result;
}
 
示例18
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<? extends Number> values = getValues();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);

	List<Feature> result = new ArrayList<>();

	if(feature instanceof CategoricalFeature){
		CategoricalFeature categoricalFeature = (CategoricalFeature)feature;

		ClassDictUtil.checkSize(values, categoricalFeature.getValues());

		for(int i = 0; i < values.size(); i++){
			result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i)));
		}
	} else

	if(feature instanceof WildcardFeature){
		WildcardFeature wildcardFeature = (WildcardFeature)feature;

		List<Integer> categories = new ArrayList<>();

		for(int i = 0; i < values.size(); i++){
			Number value = values.get(i);

			Integer category = ValueUtil.asInt(value);

			categories.add(category);

			result.add(new BinaryFeature(encoder, wildcardFeature, category));
		}

		wildcardFeature.toCategoricalFeature(categories);
	} else

	{
		throw new IllegalArgumentException();
	}

	return result;
}