我正在训练决策树 C5.0,一切都运行良好,直到我尝试预测测试数据集中的值。我不确定错误的含义:
library(pacman)
p_load(tidyverse, NHANES, C50)
rows <- sample(nrow(NHANES), as.integer(0.75 * nrow(NHANES)))
nhanes_train <- NHANES[rows,] %>%
select(SleepTrouble, everything(), -ID)
nhanes_test <- NHANES[-rows,] %>%
select(SleepTrouble, everything(), -ID)
nhanes_tree <- C5.0(nhanes_train[-1], nhanes_train$SleepTrouble)
nhanes_tree_pred <- predict(nhanes_tree, nhanes_test)
输出:
Error: *** line 1 of undefined.cases': bad value of c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,' for attribute `SurveyYr' Error limit exceeded
最佳答案
似乎当你有非数字数据如因子时,你必须使用函数的公式版本。这很好用:
nhanes_tree <- C5.0(SleepTrouble ~ ., nhanes_train)
nhanes_tree_pred <- predict(nhanes_tree, nhanes_test)
来自文档:
When using the formula method, factors and other classes are preserved (i.e. dummy variables are not automatically created). This particular model handles non-numeric data of some types (such as character, factor and ordered data).
关于r - 错误 : *** line 1 of `undefined.cases' : bad value of . .. 属性,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60445014/