Python源码示例:sklearn.preprocessing.robust_scale()
示例1
def test_robust_standardize_to_sklearn(args):
X, q_level = args
q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
assert close_enough(q1 - q0, q_level)
X_bo = stats.robust_standardize(X, q_level=q_level)
X = X[:, None]
X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1])
X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0))
assert close_enough(X_bo, X_skl, equal_nan=True)
示例2
def sk_robust(X):
return robust_scale(X)
示例3
def loopCalculate(xyzArray,eps,fn):
robustScaleList=[]
totalNumber=[]
CTableDic={}
partialCorrelationsList=[]
counter=0
#逐一计算所有距离的聚类
for i in eps:
pred,predLable=affinityPropagationForPoints(xyzArray,i) #聚类计算,返回预测值及簇类标
pt_lyrName_w=r'%s_POI'%i #字符串格式化输出文件名
point2Shp(dataBunch,pred,fn,pt_lyrName_w)
print("%s has been written to disk"%i)
counterData=Counter(pred) #聚类簇类标频数统计
# print(counterData)
counterValue=np.array(list(counterData.values()))
cvFloat=counterValue.astype(float)
robustScale=preprocessing.robust_scale(cvFloat.reshape(-1,1)) #如果数据中含有异常值,那么使用均值和方差缩放数据的效果并不好,因此用preprocessing.robust_scale()缩放带有outlier的数据
cvF=robustScale.ravel() #展平,注意numpy的ravel() 和 flatten()函数的区别
robustScaleList.append(cvF)
totalNumber.append(len(predLable)) #预测类标的数量
CTable,partial_correlations=contingencyTableChi2andPOISpaceStructure(dataBunch,pred,class_mapping,predLable,pt_lyrName_w) #返回列联表与偏相关分析
CTableDic[counter]=CTable
counter+=1
partialCorrelationsList.append(partial_correlations)
return robustScaleList,totalNumber,CTableDic,partialCorrelationsList #返回所有计算距离:1.缩放后的聚类簇类标频数统计 2.预测类标的数量 3.列联表 4.偏相关分析
示例4
def loopCalculate(df_osm,epsDegree,fn,eps):
xyzArray=pd.DataFrame({"lon": df_osm['lon'] , "lat": df_osm['lat'] }).to_numpy()
robustScaleList=[]
totalNumber=[]
CTableDic={}
partialCorrelationsList=[]
counter=0
#逐一计算所有距离的聚类
for i in range(len(epsDegree)):
pred,predLable=affinityPropagationForPoints(xyzArray,epsDegree[i]) #聚类计算,返回预测值及簇类标
pt_lyrName_w=r'%s_POI'%eps[i] #字符串格式化输出文件名
point2Shp(df_osm,pred,fn,pt_lyrName_w)
print("\n%s has been written to disk"%i)
counterData=Counter(pred) #聚类簇类标频数统计
# print(counterData)
counterValue=np.array(list(counterData.values()))
cvFloat=counterValue.astype(float)
robustScale=preprocessing.robust_scale(cvFloat.reshape(-1,1)) #如果数据中含有异常值,那么使用均值和方差缩放数据的效果并不好,因此用preprocessing.robust_scale()缩放带有outlier的数据
cvF=robustScale.ravel() #展平,注意numpy的ravel() 和 flatten()函数的区别
robustScaleList.append(cvF)
totalNumber.append(len(predLable)) #预测类标的数量
return robustScaleList,totalNumber