import pandas as pd
import numpy as np
from sklearn import preprocessing, svm
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import math
import numpy.linalg as la
df = pd.read_csv("DataWithoutHeader162.csv")
df.columns = ['Temperature','Humidity','Windspeed','Traffic','PM 2.5']
#print(df.head())
forecast_col = 'PM 2.5'
df['label'] = df[forecast_col].shift(1)
df.fillna(value=-99999, inplace=True)
X = np.array(df.drop(['label','PM 2.5'] , 1))
X = preprocessing.scale(X)
df.dropna(inplace = True)
y = np.array(df['label'])
df.dropna(inplace = True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)
#kernel definition
def radial_basis(gamma=10):
return lambda x, y: np.exp(-gamma*la.norm(np.subtract(x, y)))
#SupportVectorMachine with radial_basis Kernel
clf_SVM_radial_basis = SVC(kernel = radial_basis())
clf_SVM_radial_basis.fit(X_train,y_train)
confidence3 = clf_SVM_radial_basis.score(X_test,y_test)
print("Confidence of SVM with radial_basis Kernel = ",(confidence3*100),"%")
此代码显示错误:
Traceback (most recent call last):
File "F:\MachineLearningPyCodes\SvmOnDelhiAqiDataPrbf.py", line 68, in
module
clf_SVM_radial_basis.fit(X_train,y_train)
File "C:\Python35\lib\site-packages\sklearn\svm\base.py", line 189, in fit
fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
File "C:\Python35\lib\site-packages\sklearn\svm\base.py", line 230, in
_dense_fit
if X.shape[0] != X.shape[1]:
IndexError: tuple index out of range
我尝试了不同的方法,但我猜我无法按照要求格式化我的数据集,请告诉我一种方法。
最佳答案
我也对 SVC
的 kernel
参数如何工作感到困惑。它不仅仅是您所拥有的内核函数radial_basis
;它也是您所拥有的。它实际上必须返回Gram 矩阵。您可以在 sklearn 文档中看到这一点:
http://scikit-learn.org/stable/modules/svm.html#custom-kernels
总结该部分:您有两个选择。
(1) 将 Gram 矩阵直接插入到 fit()
方法中(不仅仅是普通的 X_train
),并使用 kernel='precompulated'
;或
(2) 编写一个返回 Gram 矩阵的函数,然后您可以将该新函数传递给内核
。
这个SO有很好的例子。改编他们写的内容,你可以这样做。我将使用第二种方法,并保留原始的 radial_basis
以供说明。
def radial_basis(x, y, gamma=10):
return np.exp(-gamma * la.norm(np.subtract(x, y)))
def proxy_kernel(X, Y, K=radial_basis):
"""Another function to return the gram_matrix,
which is needed in SVC's kernel or fit
"""
gram_matrix = np.zeros((X.shape[0], Y.shape[0]))
for i, x in enumerate(X):
for j, y in enumerate(Y):
gram_matrix[i, j] = K(x, y)
return gram_matrix
clf_SVM_radial_basis = SVC(kernel=proxy_kernel) # Note that it's proxy_kernel here now
clf_SVM_radial_basis.fit(X_train, y_train)
关于python - 我正在尝试使用自定义 RBF 内核在 scikit-learn 中实现 SVM,但它显示错误,该怎么办?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43994505/