Python源码示例:sklearn.utils.validation.check_random_state()
示例1
def endless_permutations(N, random_state=None):
"""
Generate an endless sequence of random integers from permutations of the
set [0, ..., N).
If we call this N times, we will sweep through the entire set without
replacement, on the (N+1)th call a new permutation will be created, etc.
Parameters
----------
N: int
the length of the set
random_state: int or RandomState, optional
random seed
Yields
------
int:
a random int from the set [0, ..., N)
"""
generator = check_random_state(random_state)
while True:
batch_inds = generator.permutation(N)
for b in batch_inds:
yield b
示例2
def test_parallel_train():
rng = check_random_state(12321)
n_samples, n_features = 80, 30
X_train = rng.randn(n_samples, n_features)
y_train = rng.randint(0, 2, n_samples)
clfs = [
RandomForestClassifier(n_estimators=20, n_jobs=n_jobs,
random_state=12345).fit(X_train, y_train)
for n_jobs in [1, 2, 3, 8, 16, 32]
]
X_test = rng.randn(n_samples, n_features)
probas = [clf.predict_proba(X_test) for clf in clfs]
for proba1, proba2 in zip(probas, probas[1:]):
assert_array_almost_equal(proba1, proba2)
示例3
def check_zero_or_all_relevant_labels(lrap_score):
random_state = check_random_state(0)
for n_labels in range(2, 5):
y_score = random_state.uniform(size=(1, n_labels))
y_score_ties = np.zeros_like(y_score)
# No relevant labels
y_true = np.zeros((1, n_labels))
assert_equal(lrap_score(y_true, y_score), 1.)
assert_equal(lrap_score(y_true, y_score_ties), 1.)
# Only relevant labels
y_true = np.ones((1, n_labels))
assert_equal(lrap_score(y_true, y_score), 1.)
assert_equal(lrap_score(y_true, y_score_ties), 1.)
# Degenerate case: only one label
assert_almost_equal(lrap_score([[1], [0], [1], [0]],
[[0.5], [0.5], [0.5], [0.5]]), 1.)
示例4
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
n_samples=20, random_state=0):
_, y_true = make_multilabel_classification(n_features=1,
allow_unlabeled=False,
random_state=random_state,
n_classes=n_classes,
n_samples=n_samples)
# Score with ties
y_score = sparse_random_matrix(n_components=y_true.shape[0],
n_features=y_true.shape[1],
random_state=random_state)
if hasattr(y_score, "toarray"):
y_score = y_score.toarray()
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
# Uniform score
random_state = check_random_state(random_state)
y_score = random_state.uniform(size=(n_samples, n_classes))
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
示例5
def test_multilabel_sample_weight_invariance(name):
# multilabel indicator
random_state = check_random_state(0)
_, ya = make_multilabel_classification(n_features=1, n_classes=20,
random_state=0, n_samples=100,
allow_unlabeled=False)
_, yb = make_multilabel_classification(n_features=1, n_classes=20,
random_state=1, n_samples=100,
allow_unlabeled=False)
y_true = np.vstack([ya, yb])
y_pred = np.vstack([ya, ya])
y_score = random_state.randint(1, 4, size=y_true.shape)
metric = ALL_METRICS[name]
if name in THRESHOLDED_METRICS:
check_sample_weight_invariance(name, metric, y_true, y_score)
else:
check_sample_weight_invariance(name, metric, y_true, y_pred)
示例6
def test_multilabel_label_permutations_invariance(name):
random_state = check_random_state(0)
n_samples, n_classes = 20, 4
y_true = random_state.randint(0, 2, size=(n_samples, n_classes))
y_score = random_state.randint(0, 2, size=(n_samples, n_classes))
metric = ALL_METRICS[name]
score = metric(y_true, y_score)
for perm in permutations(range(n_classes), n_classes):
y_score_perm = y_score[:, perm]
y_true_perm = y_true[:, perm]
current_score = metric(y_true_perm, y_score_perm)
assert_almost_equal(score, current_score)
示例7
def test_thresholded_multilabel_multioutput_permutations_invariance(name):
random_state = check_random_state(0)
n_samples, n_classes = 20, 4
y_true = random_state.randint(0, 2, size=(n_samples, n_classes))
y_score = random_state.normal(size=y_true.shape)
# Makes sure all samples have at least one label. This works around errors
# when running metrics where average="sample"
y_true[y_true.sum(1) == 4, 0] = 0
y_true[y_true.sum(1) == 0, 0] = 1
metric = ALL_METRICS[name]
score = metric(y_true, y_score)
for perm in permutations(range(n_classes), n_classes):
y_score_perm = y_score[:, perm]
y_true_perm = y_true[:, perm]
current_score = metric(y_true_perm, y_score_perm)
assert_almost_equal(score, current_score)
示例8
def test_execute():
"""Check executing the program works"""
params = {'function_set': [add2, sub2, mul2, div2],
'arities': {2: [add2, sub2, mul2, div2]},
'init_depth': (2, 6),
'init_method': 'half and half',
'n_features': 10,
'const_range': (-1.0, 1.0),
'metric': 'mean absolute error',
'p_point_replace': 0.05,
'parsimony_coefficient': 0.1}
random_state = check_random_state(415)
# Test for a small program
test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
X = np.reshape(random_state.uniform(size=50), (5, 10))
gp = _Program(random_state=random_state, program=test_gp, **params)
result = gp.execute(X)
expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618]
assert_array_almost_equal(result, expected)
示例9
def test_get_subtree():
"""Check that get subtree does the same thing for self and new programs"""
params = {'function_set': [add2, sub2, mul2, div2],
'arities': {2: [add2, sub2, mul2, div2]},
'init_depth': (2, 6),
'init_method': 'half and half',
'n_features': 10,
'const_range': (-1.0, 1.0),
'metric': 'mean absolute error',
'p_point_replace': 0.05,
'parsimony_coefficient': 0.1}
random_state = check_random_state(415)
# Test for a small program
test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
gp = _Program(random_state=random_state, program=test_gp, **params)
self_test = gp.get_subtree(check_random_state(0))
external_test = gp.get_subtree(check_random_state(0), test_gp)
assert_equal(self_test, external_test)
示例10
def test_yj_fit_transform():
yj = YeoJohnsonTransformer(cols=X.columns[:2]) # just first two cols
trans = yj.fit_transform(X)
assert isinstance(trans, pd.DataFrame)
# Test it on a random...
m, n = 1000, 5
random_state = check_random_state(42)
x = random_state.rand(m, n)
# make some random
mask = random_state.rand(m, n) % 2 < 0.5
signs = np.ones((m, n))
signs[~mask] = -1
x *= signs
YeoJohnsonTransformer().fit(x)
示例11
def test_KNeighborsRegressor_multioutput_uniform_weight(algorithm, weights):
# Test k-neighbors in multi-output regression with uniform weight
rng = check_random_state(0)
n_features = 5
n_samples = 40
n_output = 4
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples, n_output)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
knn = neighbors.KNeighborsRegressor(weights=weights,
algorithm=algorithm)
knn.fit(X_train, y_train)
neigh_idx = knn.kneighbors(X_test, return_distance=False)
y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
for idx in neigh_idx])
y_pred = knn.predict(X_test)
assert_equal(y_pred.shape, y_test.shape)
assert_equal(y_pred_idx.shape, y_test.shape)
assert_array_almost_equal(y_pred, y_pred_idx)
示例12
def _check_inputs(
self,
X: Union[ArrayLike, DataFrameType],
accept_sparse_negative: bool = False,
copy: bool = False,
in_fit: bool = True,
) -> Union[ArrayLike, DataFrameType]:
if isinstance(X, (pd.DataFrame, dd.DataFrame)):
X = X.values
if isinstance(X, np.ndarray):
C = len(X) // min(multiprocessing.cpu_count(), 2)
X = da.from_array(X, chunks=C)
rng = check_random_state(self.random_state)
# TODO: non-float dtypes?
# TODO: sparse arrays?
# TODO: mix of sparse, dense?
sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype)
super(QuantileTransformer, self)._check_inputs(
sample,
accept_sparse_negative=accept_sparse_negative,
copy=copy,
in_fit=in_fit,
)
return X
示例13
def __init__(self, X, y, criterion, min_samples_split, max_depth,
n_val_sample, random_state):
# make sure max_depth > 1
if max_depth < 2:
raise ValueError("max depth must be > 1")
# check the input arrays, and if it's classification validate the
# target values in y
X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True)
if is_classifier(self):
check_classification_targets(y)
# hyper parameters so we can later inspect attributes of the model
self.min_samples_split = min_samples_split
self.max_depth = max_depth
self.n_val_sample = n_val_sample
self.random_state = random_state
# create the splitting class
random_state = check_random_state(random_state)
self.splitter = RandomSplitter(random_state, criterion, n_val_sample)
# grow the tree depth first
self.tree = self._find_next_split(X, y, 0)
示例14
def test_parallel_train():
rng = check_random_state(12321)
n_samples, n_features = 80, 30
X_train = rng.randn(n_samples, n_features)
y_train = rng.randint(0, 2, n_samples)
clfs = [
RandomForestClassifier(n_estimators=20, n_jobs=n_jobs,
random_state=12345).fit(X_train, y_train)
for n_jobs in [1, 2, 3, 8, 16, 32]
]
X_test = rng.randn(n_samples, n_features)
probas = [clf.predict_proba(X_test) for clf in clfs]
for proba1, proba2 in zip(probas, probas[1:]):
assert_array_almost_equal(proba1, proba2)
示例15
def check_zero_or_all_relevant_labels(lrap_score):
random_state = check_random_state(0)
for n_labels in range(2, 5):
y_score = random_state.uniform(size=(1, n_labels))
y_score_ties = np.zeros_like(y_score)
# No relevant labels
y_true = np.zeros((1, n_labels))
assert_equal(lrap_score(y_true, y_score), 1.)
assert_equal(lrap_score(y_true, y_score_ties), 1.)
# Only relevant labels
y_true = np.ones((1, n_labels))
assert_equal(lrap_score(y_true, y_score), 1.)
assert_equal(lrap_score(y_true, y_score_ties), 1.)
# Degenerate case: only one label
assert_almost_equal(lrap_score([[1], [0], [1], [0]],
[[0.5], [0.5], [0.5], [0.5]]), 1.)
示例16
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
n_samples=20, random_state=0):
_, y_true = make_multilabel_classification(n_features=1,
allow_unlabeled=False,
random_state=random_state,
n_classes=n_classes,
n_samples=n_samples)
# Score with ties
y_score = sparse_random_matrix(n_components=y_true.shape[0],
n_features=y_true.shape[1],
random_state=random_state)
if hasattr(y_score, "toarray"):
y_score = y_score.toarray()
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
# Uniform score
random_state = check_random_state(random_state)
y_score = random_state.uniform(size=(n_samples, n_classes))
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
示例17
def test_ch_base():
test = CHTest(m=2)
assert test.estimate_seasonal_differencing_term(None) == 0
# test really long m for random array
random_state = check_random_state(42)
CHTest(m=365).estimate_seasonal_differencing_term(random_state.rand(400))
示例18
def check_importances(name, criterion, dtype, tolerance):
# cast as dype
X = X_large.astype(dtype, copy=False)
y = y_large.astype(dtype, copy=False)
ForestEstimator = FOREST_ESTIMATORS[name]
est = ForestEstimator(n_estimators=10, criterion=criterion,
random_state=0)
est.fit(X, y)
importances = est.feature_importances_
# The forest estimator can detect that only the first 3 features of the
# dataset are informative:
n_important = np.sum(importances > 0.1)
assert_equal(importances.shape[0], 10)
assert_equal(n_important, 3)
assert np.all(importances[:3] > 0.1)
# Check with parallel
importances = est.feature_importances_
est.set_params(n_jobs=2)
importances_parallel = est.feature_importances_
assert_array_almost_equal(importances, importances_parallel)
# Check with sample weights
sample_weight = check_random_state(0).randint(1, 10, len(X))
est = ForestEstimator(n_estimators=10, random_state=0, criterion=criterion)
est.fit(X, y, sample_weight=sample_weight)
importances = est.feature_importances_
assert np.all(importances >= 0.0)
for scale in [0.5, 100]:
est = ForestEstimator(n_estimators=10, random_state=0,
criterion=criterion)
est.fit(X, y, sample_weight=scale * sample_weight)
importances_bis = est.feature_importances_
assert_less(np.abs(importances - importances_bis).mean(), tolerance)
示例19
def test_only_constant_features():
random_state = check_random_state(0)
X = np.zeros((10, 20))
y = random_state.randint(0, 2, (10, ))
for name, TreeEstimator in ALL_TREES.items():
est = TreeEstimator(random_state=0)
est.fit(X, y)
assert_equal(est.tree_.max_depth, 0)
示例20
def test_auc_score_non_binary_class():
# Test that roc_auc_score function returns an error when trying
# to compute AUC for non-binary class values.
rng = check_random_state(404)
y_pred = rng.rand(10)
# y_true contains only one class value
y_true = np.zeros(10, dtype="int")
assert_raise_message(ValueError, "ROC AUC score is not defined",
roc_auc_score, y_true, y_pred)
y_true = np.ones(10, dtype="int")
assert_raise_message(ValueError, "ROC AUC score is not defined",
roc_auc_score, y_true, y_pred)
y_true = np.full(10, -1, dtype="int")
assert_raise_message(ValueError, "ROC AUC score is not defined",
roc_auc_score, y_true, y_pred)
# y_true contains three different class values
y_true = rng.randint(0, 3, size=10)
assert_raise_message(ValueError, "multiclass format is not supported",
roc_auc_score, y_true, y_pred)
clean_warning_registry()
with warnings.catch_warnings(record=True):
rng = check_random_state(404)
y_pred = rng.rand(10)
# y_true contains only one class value
y_true = np.zeros(10, dtype="int")
assert_raise_message(ValueError, "ROC AUC score is not defined",
roc_auc_score, y_true, y_pred)
y_true = np.ones(10, dtype="int")
assert_raise_message(ValueError, "ROC AUC score is not defined",
roc_auc_score, y_true, y_pred)
y_true = np.full(10, -1, dtype="int")
assert_raise_message(ValueError, "ROC AUC score is not defined",
roc_auc_score, y_true, y_pred)
# y_true contains three different class values
y_true = rng.randint(0, 3, size=10)
assert_raise_message(ValueError, "multiclass format is not supported",
roc_auc_score, y_true, y_pred)
示例21
def test_binary_clf_curve():
rng = check_random_state(404)
y_true = rng.randint(0, 3, size=10)
y_pred = rng.rand(10)
msg = "multiclass format is not supported"
assert_raise_message(ValueError, msg, precision_recall_curve,
y_true, y_pred)
示例22
def test_sample_order_invariance(name):
random_state = check_random_state(0)
y_true = random_state.randint(0, 2, size=(20, ))
y_pred = random_state.randint(0, 2, size=(20, ))
y_true_shuffle, y_pred_shuffle = shuffle(y_true, y_pred, random_state=0)
with ignore_warnings():
metric = ALL_METRICS[name]
assert_allclose(metric(y_true, y_pred),
metric(y_true_shuffle, y_pred_shuffle),
err_msg="%s is not sample order invariant" % name)
示例23
def test_sample_order_invariance_multilabel_and_multioutput():
random_state = check_random_state(0)
# Generate some data
y_true = random_state.randint(0, 2, size=(20, 25))
y_pred = random_state.randint(0, 2, size=(20, 25))
y_score = random_state.normal(size=y_true.shape)
y_true_shuffle, y_pred_shuffle, y_score_shuffle = shuffle(y_true,
y_pred,
y_score,
random_state=0)
for name in MULTILABELS_METRICS:
metric = ALL_METRICS[name]
assert_allclose(metric(y_true, y_pred),
metric(y_true_shuffle, y_pred_shuffle),
err_msg="%s is not sample order invariant" % name)
for name in THRESHOLDED_MULTILABEL_METRICS:
metric = ALL_METRICS[name]
assert_allclose(metric(y_true, y_score),
metric(y_true_shuffle, y_score_shuffle),
err_msg="%s is not sample order invariant" % name)
for name in MULTIOUTPUT_METRICS:
metric = ALL_METRICS[name]
assert_allclose(metric(y_true, y_score),
metric(y_true_shuffle, y_score_shuffle),
err_msg="%s is not sample order invariant" % name)
assert_allclose(metric(y_true, y_pred),
metric(y_true_shuffle, y_pred_shuffle),
err_msg="%s is not sample order invariant" % name)
示例24
def test_thresholded_invariance_string_vs_numbers_labels(name):
# Ensure that thresholded metrics with string labels are invariant
random_state = check_random_state(0)
y1 = random_state.randint(0, 2, size=(20, ))
y2 = random_state.randint(0, 2, size=(20, ))
y1_str = np.array(["eggs", "spam"])[y1]
pos_label_str = "spam"
with ignore_warnings():
metric = THRESHOLDED_METRICS[name]
if name not in METRIC_UNDEFINED_BINARY:
# Ugly, but handle case with a pos_label and label
metric_str = metric
if name in METRICS_WITH_POS_LABEL:
metric_str = partial(metric_str, pos_label=pos_label_str)
measure_with_number = metric(y1, y2)
measure_with_str = metric_str(y1_str, y2)
assert_array_equal(measure_with_number, measure_with_str,
err_msg="{0} failed string vs number "
"invariance test".format(name))
measure_with_strobj = metric_str(y1_str.astype('O'), y2)
assert_array_equal(measure_with_number, measure_with_strobj,
err_msg="{0} failed string object vs number "
"invariance test".format(name))
else:
# TODO those metrics doesn't support string label yet
assert_raises(ValueError, metric, y1_str, y2)
assert_raises(ValueError, metric, y1_str.astype('O'), y2)
示例25
def test_multioutput_regression_invariance_to_dimension_shuffling(name):
# test invariance to dimension shuffling
random_state = check_random_state(0)
y_true = random_state.uniform(0, 2, size=(20, 5))
y_pred = random_state.uniform(0, 2, size=(20, 5))
metric = ALL_METRICS[name]
error = metric(y_true, y_pred)
for _ in range(3):
perm = random_state.permutation(y_true.shape[1])
assert_allclose(metric(y_true[:, perm], y_pred[:, perm]),
error,
err_msg="%s is not dimension shuffling invariant" % (
name))
示例26
def test_normalize_option_multiclass_classification(name):
# Test in the multiclass case
random_state = check_random_state(0)
y_true = random_state.randint(0, 4, size=(20, ))
y_pred = random_state.randint(0, 4, size=(20, ))
n_samples = y_true.shape[0]
metrics = ALL_METRICS[name]
measure = metrics(y_true, y_pred, normalize=True)
assert_array_less(-1.0 * measure, 0,
err_msg="We failed to test correctly the normalize "
"option")
assert_allclose(metrics(y_true, y_pred, normalize=False) / n_samples,
measure)
示例27
def test_averaging_multiclass(name):
n_samples, n_classes = 50, 3
random_state = check_random_state(0)
y_true = random_state.randint(0, n_classes, size=(n_samples, ))
y_pred = random_state.randint(0, n_classes, size=(n_samples, ))
y_score = random_state.uniform(size=(n_samples, n_classes))
lb = LabelBinarizer().fit(y_true)
y_true_binarize = lb.transform(y_true)
y_pred_binarize = lb.transform(y_pred)
check_averaging(name, y_true, y_true_binarize,
y_pred, y_pred_binarize, y_score)
示例28
def test_averaging_multilabel(name):
n_samples, n_classes = 40, 5
_, y = make_multilabel_classification(n_features=1, n_classes=n_classes,
random_state=5, n_samples=n_samples,
allow_unlabeled=False)
y_true = y[:20]
y_pred = y[20:]
y_score = check_random_state(0).normal(size=(20, n_classes))
y_true_binarize = y_true
y_pred_binarize = y_pred
check_averaging(name, y_true, y_true_binarize,
y_pred, y_pred_binarize, y_score)
示例29
def test_regression_sample_weight_invariance(name):
n_samples = 50
random_state = check_random_state(0)
# regression
y_true = random_state.random_sample(size=(n_samples,))
y_pred = random_state.random_sample(size=(n_samples,))
metric = ALL_METRICS[name]
check_sample_weight_invariance(name, metric, y_true, y_pred)
示例30
def test_binary_sample_weight_invariance(name):
# binary
n_samples = 50
random_state = check_random_state(0)
y_true = random_state.randint(0, 2, size=(n_samples, ))
y_pred = random_state.randint(0, 2, size=(n_samples, ))
y_score = random_state.random_sample(size=(n_samples,))
metric = ALL_METRICS[name]
if name in THRESHOLDED_METRICS:
check_sample_weight_invariance(name, metric, y_true, y_score)
else:
check_sample_weight_invariance(name, metric, y_true, y_pred)