以下是我在进行超参数调整之前的代码。我使用抑制分割将数据溢出到测试和训练中:
class_label=repair['PART_NO']
x=repair.drop('PART_NO',1)
X_train, X_test, y_train, y_test=cross_validation.train_test_split(x,class_label, train_size = 0.80)
def modelfit(alg, X_train, y_train ,useTrainCV=True, cv_folds=5,early_stopping_rounds=50):
if useTrainCV:
xgb_param = alg.get_xgb_params()
xgtrain = xgb.DMatrix(X_train, label=y_train)
extra = {'num_class': 2182}
xgb_param.update(extra)
cvresult = xgb.cv(xgb_param,
xgtrain,
num_boost_round=alg.get_params()['n_estimators'],
nfold=cv_folds,
stratified=True,
metrics={'merror'},
early_stopping_rounds=early_stopping_rounds,
seed=0,
callbacks=[xgb.callback.print_evaluation(show_stdv=False)]),
print cvresult
alg.set_params(n_estimators=cvresult.shape[0])
#Fit the algorithm on the data
alg.fit(X_train, y_train,eval_metric='merror')
#Predict training set:
dtrain_predictions = alg.predict(X_test)
dtrain_predprob = alg.predict_proba(X_test)
#Print model report:
print "\nModel Report"
print "Accuracy : %.4g" % metrics.accuracy_score( dtrain_predictions,y_test)
print "Merror Score (Train): %f" % metrics.merror_score( dtrain_predprob,y_test)
feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
feat_imp.plot(kind='bar', title='Feature Importances')
plt.ylabel('Feature Importance Score')
此后,我尝试选择除目标之外的所有预测变量,并获取估计变量的数量,如下所示:
xgb1 = XGBClassifier(
learning_rate =0.1,
n_estimators=280,
max_depth=5,
min_child_weight=1,
gamma=0,
subsample=0.8,
colsample_bytree=0.8,
objective= 'multi:softprob',
nthread=4,
scale_pos_weight=1,
seed=27)
modelfit(xgb1, X_train, y_train)
但是,在执行此操作时,即使我在 xgb_params 中提到了 num_class 并且我的 y_train 类型为 int,我也会收到以下错误。 请建议该怎么办?具体错误如下:
--------------------------------------------------------------------------- XGBoostError Traceback (most recent call last) in () 12 scale_pos_weight=1, 13 seed=27) ---> 14 modelfit(xgb1, X_train, y_train)
in modelfit(alg, X_train, y_train, useTrainCV, cv_folds, early_stopping_rounds) 14 early_stopping_rounds=early_stopping_rounds, 15 seed=0, ---> 16 callbacks=[xgb.callback.print_evaluation(show_stdv=False)]), 17 print cvresult 18 alg.set_params(n_estimators=cvresult.shape[0])
/Users/sayontimondal/anaconda2/lib/python2.7/site-packages/xgboost/training.pyc in cv(params, dtrain, num_boost_round, nfold, stratified, folds, metrics, obj, feval, maximize, early_stopping_rounds, fpreproc, as_pandas, verbose_eval, show_stdv, seed, callbacks, shuffle) 404 evaluation_result_list=None)) 405 for fold in cvfolds: --> 406 fold.update(i, obj) 407 res = aggcv([f.eval(i, feval) for f in cvfolds]) 408
/Users/sayontimondal/anaconda2/lib/python2.7/site-packages/xgboost/training.pyc in update(self, iteration, fobj) 216 def update(self, iteration, fobj): 217 """"Update the boosters for one iteration""" --> 218 self.bst.update(self.dtrain, iteration, fobj) 219 220 def eval(self, iteration, feval):
/Users/sayontimondal/anaconda2/lib/python2.7/site-packages/xgboost/core.pyc in update(self, dtrain, iteration, fobj) 892 if fobj is None: 893 _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, ctypes.c_int(iteration), --> 894 dtrain.handle)) 895 else: 896 pred = self.predict(dtrain)
/Users/sayontimondal/anaconda2/lib/python2.7/site-packages/xgboost/core.pyc in _check_call(ret) 128 """ 129 if ret != 0: --> 130 raise XGBoostError(_LIB.XGBGetLastError()) 131 132
XGBoostError: [13:34:08] src/objective/multiclass_obj.cc:78: Check failed: label_error >= 0 && label_error < nclass SoftmaxMultiClassObj: label must be in [0, num_class), num_class=2182 but found 2182 in label.
Stack trace returned 7 entries: [bt] (0) 0 libxgboost.dylib
0x000000010d0684a0 dmlc::StackTrace() + 288 [bt] (1) 1
libxgboost.dylib 0x000000010d06823f dmlc::LogMessageFatal::~LogMessageFatal() + 47 [bt] (2) 2
libxgboost.dylib 0x000000010d0dcf9a xgboost::obj::SoftmaxMultiClassObj::GetGradient(xgboost::HostDeviceVector*, xgboost::MetaInfo const&, int, xgboost::HostDeviceVectorblock 引用>) + 2218 [bt] (3) 3 libxgboost.dylib 0x000000010d0645f9 xgboost::LearnerImpl::UpdateOneIter(int, xgboost::DMatrix) + 1017 [bt] (4) 4 libxgboost.dylib
0x000000010d07ef07 XGBoosterUpdateOneIter + 87 [bt] (5) 5 _ctypes.so 0x0000000103528677 ffi_call_unix64 + 79 [bt] (6) 6 ???
0x00007ffeefbfa980 0x0 + 140732920736128在谷歌上搜索它没有显示任何结果。
最佳答案
您的标签应该从 0 开始到类总数 - 1。 例如 - 如果您的类标签为 (1,2,3,4,5)。为了将其提供给 multi:softprob 目标,您需要将其转换为 (0,1,2,3,4) 类。 可以通过使用 y.replace({1:0, 2:1, 3:2, 4:3, 5:4}, inplace = True)
来完成关于python - 如何使用 XGBoost softprob 多类分类,这样我就不会收到 num_class 的错误?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52618302/