Python源码示例:sklearn.datasets.fetch_california_housing()

示例1
def fetch(*args, **kwargs):
    return fetch_california_housing(*args, download_if_missing=False, **kwargs) 
示例2
def load_data_target(name):
    """
    Loads data and target given the name of the dataset.
    """
    if name == "Boston":
        data = load_boston()
    elif name == "Housing":
        data = fetch_california_housing()
        dataset_size = 1000 # this is necessary so that SVR does not slow down too much
        data["data"] = data["data"][:dataset_size]
        data["target"] =data["target"][:dataset_size]
    elif name == "digits":
        data = load_digits()
    elif name == "Climate Model Crashes":
        try:
            data = fetch_mldata("climate-model-simulation-crashes")
        except HTTPError as e:
            url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00252/pop_failures.dat"
            data = urlopen(url).read().split('\n')[1:]
            data = [[float(v) for v in d.split()] for d in data]
            samples = np.array(data)
            data = dict()
            data["data"] = samples[:, :-1]
            data["target"] = np.array(samples[:, -1], dtype=np.int)
    else:
        raise ValueError("dataset not supported.")
    return data["data"], data["target"] 
示例3
def load_housing():
    from sklearn.datasets import fetch_california_housing
    d=fetch_california_housing()
    d['data'] -= d['data'].mean(axis=0)
    d['data'] /= d['data'].std(axis=0)
    
    # Housing prices above 5 are all collapsed to 5, which makes the Y distribution very strange. Drop these
    d['data']   = d['data'][d['target'] < 5]
    d['target'] = d['target'][d['target'] < 5]
    
    d['target'] = np.log(d['target'])
    
    np.random.seed(12345)
    permutation = np.random.permutation(len(d['data']))
    d['data']   = d['data'][permutation]
    d['target'] = d['target'][permutation]
    
    l = int(len(d['data'])*0.8)
    
    data = {'err':'mse',
            'trn_X': d['data'][:l],
            'trn_Y': np.atleast_2d(d['target'][:l]).T,
            'tst_X': d['data'][l:],
            'tst_Y': np.atleast_2d(d['target'][l:]).T,
           }
    
    return data