R.do.call函数返回多

标签 r classification do.call

具有以下内容

> library(gbm)
> mdl<-gbm::gbm(data=iris,formula=Species ~ .,distribution="gaussian")

然后:

> mdl
gbm::gbm(formula = Species ~ ., distribution = "gaussian", data = iris)
A gradient boosted model with gaussian loss function.
100 iterations were performed.
There were 4 predictors of which 4 had non-zero influence.

我想要什么。

但是,如果我使用do.call:

> mdl<-do.call(gbm::gbm,list(data=iris,formula=Species ~ .,distribution="gaussian"))

然后

> mdl
(function (formula = formula(data), distribution = "bernoulli", 
    data = list(), weights, var.monotone = NULL, n.trees = 100, 
    interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1, 
    bag.fraction = 0.5, train.fraction = 1, cv.folds = 0, keep.data = TRUE,
...
...
...
A gradient boosted model with gaussian loss function.
100 iterations were performed.
There were 4 predictors of which 4 had non-zero influence.

它首先打印 gbm 函数的定义,然后打印 gbm 调用(包括整个数据集 iris),最后打印我正在查找的文本。

我需要使用 do.call 因为我想参数化任何分类算法的所有参数,然后将输出放入 shiny< 中的 verbatimTextOutput.

有没有办法阻止do.call返回gbm的定义以及整个数据集?或者也许有其他方法来执行 gbm 在列表中传递参数?

谢谢。

最佳答案

我们必须做两件事来防止这种行为:

首先,我们可以在iris上使用substitute()。这将阻止调用列出iris数据集的所有列。

其次,我们应该避免在 do.call 中使用 gbm::gbm ,而是加载库并使用字符串 "gbm"而不是调用函数:

library(gbm)

mdl <- do.call("gbm",
               list(data = substitute(iris),
                    formula= Species ~ .,
                    distribution = "gaussian")
               )
mdl

#> gbm(formula = Species ~ ., distribution = "gaussian", data = iris)
#> A gradient boosted model with gaussian loss function.
#> 100 iterations were performed.
#> There were 4 predictors of which 4 had non-zero influence.

如果我们在 do 调用中使用 gbm::gbm ,它将在捕获的调用中包含整个函数定义:

mdl <- do.call(gbm::gbm,
               list(data = substitute(iris),
                    formula= Species ~ .,
                    distribution = "gaussian")
               )
mdl

#> (function (formula = formula(data), distribution = "bernoulli", 
#>     data = list(), weights, var.monotone = NULL, n.trees = 100, 
#>     interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1, 
#>     bag.fraction = 0.5, train.fraction = 1, cv.folds = 0, keep.data = TRUE, 
#>     verbose = FALSE, class.stratify.cv = NULL, n.cores = NULL) 
#> {
#>     mcall <- match.call()
#>     lVerbose <- if (!is.logical(verbose)) {
#>         FALSE
#>     }
#>     else {
#>         verbose
#>     }
#>     mf <- match.call(expand.dots = FALSE)
#>     m <- match(c("formula", "data", "weights", "offset"), names(mf), 
#>         0)
#>     mf <- mf[c(1, m)]
#>     mf$drop.unused.levels <- TRUE
#>     mf$na.action <- na.pass
#>     mf[[1]] <- as.name("model.frame")
#>     m <- mf
#>     mf <- eval(mf, parent.frame())
#>     Terms <- attr(mf, "terms")
#>     w <- model.weights(mf)
#>     offset <- model.offset(mf)
#>     y <- model.response(mf)
#>     if (missing(distribution)) {
#>         distribution <- guessDist(y)
#>     }
#>     if (is.character(distribution)) {
#>         distribution <- list(name = distribution)
#>     }
#>     if (!is.element(distribution$name, getAvailableDistributions())) {
#>         stop("Distribution ", distribution$name, " is not supported.")
#>     }
#>     if (distribution$name == "multinomial") {
#>         warning("Setting `distribution = \"multinomial\"` is ill-advised as it is ", 
#>             "currently broken. It exists only for backwards compatibility. ", 
#>             "Use at your own risk.", call. = FALSE)
#>     }
#>     var.names <- attributes(Terms)$term.labels
#>     x <- model.frame(terms(reformulate(var.names)), data = data, 
#>         na.action = na.pass)
#>     response.name <- as.character(formula[[2L]])
#>     class.stratify.cv <- getStratify(class.stratify.cv, d = distribution)
#>     group <- NULL
#>     num.groups <- 0
#>     if (distribution$name != "pairwise") {
#>         nTrain <- floor(train.fraction * nrow(x))
#>     }
#>     else {
#>         distribution.group <- distribution[["group"]]
#>         if (is.null(distribution.group)) {
#>             stop(paste("For pairwise regression, `distribution` must be a list of", 
#>                 "the form `list(name = \"pairwise\", group = c(\"date\",", 
#>                 "\"session\", \"category\", \"keywords\"))`."))
#>         }
#>         i <- match(distribution.group, colnames(data))
#>         if (any(is.na(i))) {
#>             stop("Group column does not occur in data: ", distribution.group[is.na(i)], 
#>                 ".")
#>         }
#>         group <- factor(do.call(paste, c(data[, distribution.group, 
#>             drop = FALSE], sep = ":")))
#>         if ((!missing(weights)) && (!is.null(weights))) {
#>             w.min <- tapply(w, INDEX = group, FUN = min)
#>             w.max <- tapply(w, INDEX = group, FUN = max)
#>             if (any(w.min != w.max)) {
#>                 stop("For `distribution = \"pairwise\"`, all instances for the same ", 
#>                   "group must have the same weight.")
#>             }
#>             w <- w * length(w.min)/sum(w.min)
#>         }
#>         perm.levels <- levels(group)[sample(1:nlevels(group))]
#>         group <- factor(group, levels = perm.levels)
#>         ord.group <- order(group, -y)
#>         group <- group[ord.group]
#>         y <- y[ord.group]
#>         x <- x[ord.group, , drop = FALSE]
#>         w <- w[ord.group]
#>         num.groups.train <- max(1, round(train.fraction * nlevels(group)))
#>         nTrain <- max(which(group == levels(group)[num.groups.train]))
#>         Misc <- group
#>     }
#>     cv.error <- NULL
#>     if (cv.folds == 1) {
#>         cv.folds <- 0
#>     }
#>     if (cv.folds > 1) {
#>         cv.results <- gbmCrossVal(cv.folds = cv.folds, nTrain = nTrain, 
#>             n.cores = n.cores, class.stratify.cv = class.stratify.cv, 
#>             data = data, x = x, y = y, offset = offset, distribution = distribution, 
#>             w = w, var.monotone = var.monotone, n.trees = n.trees, 
#>             interaction.depth = interaction.depth, n.minobsinnode = n.minobsinnode, 
#>             shrinkage = shrinkage, bag.fraction = bag.fraction, 
#>             var.names = var.names, response.name = response.name, 
#>             group = group)
#>         cv.error <- cv.results$error
#>         p <- cv.results$predictions
#>     }
#>     gbm.obj <- gbm.fit(x = x, y = y, offset = offset, distribution = distribution, 
#>         w = w, var.monotone = var.monotone, n.trees = n.trees, 
#>         interaction.depth = interaction.depth, n.minobsinnode = n.minobsinnode, 
#>         shrinkage = shrinkage, bag.fraction = bag.fraction, nTrain = nTrain, 
#>         keep.data = keep.data, verbose = lVerbose, var.names = var.names, 
#>         response.name = response.name, group = group)
#>     gbm.obj$train.fraction <- train.fraction
#>     gbm.obj$Terms <- Terms
#>     gbm.obj$cv.error <- cv.error
#>     gbm.obj$cv.folds <- cv.folds
#>     gbm.obj$call <- mcall
#>     gbm.obj$m <- m
#>     if (cv.folds > 1) {
#>         gbm.obj$cv.fitted <- p
#>     }
#>     if (distribution$name == "pairwise") {
#>         gbm.obj$ord.group <- ord.group
#>         gbm.obj$fit <- gbm.obj$fit[order(ord.group)]
#>     }
#>     gbm.obj
#> })(formula = Species ~ ., distribution = "gaussian", data = iris)
#> A gradient boosted model with gaussian loss function.
#> 100 iterations were performed.
#> There were 4 predictors of which 4 had non-zero influence.

reprex package 于 2023 年 2 月 20 日创建(v2.0.1)

关于R.do.call函数返回多,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/75512192/

相关文章:

r - 如何将列表与环境一起放置?

python - 如何计算python中点过程的残差

r - 在 R 中的高尔夫网站上抓取排行榜表

python - 使用 num_labels 1 vs 2 的 huggingface 变形金刚分类

r - 如何使用 shinydashboardPlus 的轮播实现动态数量的幻灯片?

r - Top_n 返回最大值和最小值 - R

python - 为分类器设置参数并在不拟合的情况下使用它

machine-learning - 涉及数字范围作为标签的多标签分类

R 使用 do.call、lapply 或 mapply 从向量列表中进行子选择?