Python源码示例:sklearn.preprocessing.robust_scale()

示例1
def test_robust_standardize_to_sklearn(args):
    X, q_level = args

    q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
    assert close_enough(q1 - q0, q_level)

    X_bo = stats.robust_standardize(X, q_level=q_level)

    X = X[:, None]
    X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1])
    X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0))

    assert close_enough(X_bo, X_skl, equal_nan=True) 
示例2
def sk_robust(X):
    return robust_scale(X) 
示例3
def loopCalculate(xyzArray,eps,fn):
    robustScaleList=[]
    totalNumber=[]
    CTableDic={}
    partialCorrelationsList=[]
    counter=0
    
    #逐一计算所有距离的聚类
    for i in eps:        
        pred,predLable=affinityPropagationForPoints(xyzArray,i) #聚类计算,返回预测值及簇类标

        pt_lyrName_w=r'%s_POI'%i #字符串格式化输出文件名
        point2Shp(dataBunch,pred,fn,pt_lyrName_w) 
        print("%s has been written to disk"%i)

        counterData=Counter(pred)   #聚类簇类标频数统计
#        print(counterData)
        counterValue=np.array(list(counterData.values()))
        cvFloat=counterValue.astype(float)
        robustScale=preprocessing.robust_scale(cvFloat.reshape(-1,1))  #如果数据中含有异常值,那么使用均值和方差缩放数据的效果并不好,因此用preprocessing.robust_scale()缩放带有outlier的数据 
        cvF=robustScale.ravel() #展平,注意numpy的ravel() 和 flatten()函数的区别
        robustScaleList.append(cvF)        
        totalNumber.append(len(predLable)) #预测类标的数量
        
        CTable,partial_correlations=contingencyTableChi2andPOISpaceStructure(dataBunch,pred,class_mapping,predLable,pt_lyrName_w) #返回列联表与偏相关分析
        CTableDic[counter]=CTable
        counter+=1
        partialCorrelationsList.append(partial_correlations)
        
    return robustScaleList,totalNumber,CTableDic,partialCorrelationsList #返回所有计算距离:1.缩放后的聚类簇类标频数统计 2.预测类标的数量 3.列联表 4.偏相关分析 
示例4
def loopCalculate(df_osm,epsDegree,fn,eps):
    xyzArray=pd.DataFrame({"lon": df_osm['lon'] , "lat": df_osm['lat'] }).to_numpy()
    robustScaleList=[]
    totalNumber=[]
    CTableDic={}
    partialCorrelationsList=[]
    counter=0
    
    #逐一计算所有距离的聚类
    for i in range(len(epsDegree)):        
        pred,predLable=affinityPropagationForPoints(xyzArray,epsDegree[i]) #聚类计算,返回预测值及簇类标

        pt_lyrName_w=r'%s_POI'%eps[i] #字符串格式化输出文件名
        point2Shp(df_osm,pred,fn,pt_lyrName_w) 
        print("\n%s has been written to disk"%i)
        
        counterData=Counter(pred)   #聚类簇类标频数统计
#        print(counterData)
        counterValue=np.array(list(counterData.values()))
        cvFloat=counterValue.astype(float)
        robustScale=preprocessing.robust_scale(cvFloat.reshape(-1,1))  #如果数据中含有异常值,那么使用均值和方差缩放数据的效果并不好,因此用preprocessing.robust_scale()缩放带有outlier的数据 
        cvF=robustScale.ravel() #展平,注意numpy的ravel() 和 flatten()函数的区别
        robustScaleList.append(cvF)        
        totalNumber.append(len(predLable)) #预测类标的数量       
        
    return robustScaleList,totalNumber