r - 在 R 中封装 dglm

标签 r glm tweedie

我正在尝试使用 dglm 包在 R 中安装双 glm。这与 statmod 包结合使用以使用 tweedie 模型。问题的重现是:

library(dglm)
library(statmod)

p <- 1.5
y <- runif(10)
x <- runif(10)

dglm(y~x,~x,family=tweedie(link.power=0, var.power=p))
#doesnt work

dglm(y~x,~x,family=tweedie(link.power=0, var.power=1.5))
#works

var.power 需要在变量中定义,因为我想使用一个循环,其中 dglm 在其每个条目上运行

最佳答案

因此,您可以通过强制 dglm 评估您输入 p 的调用来解决该问题。在 dglm 函数中,大约第 73 行:

if (family$family == "Tweedie") {
    tweedie.p <- call$family$var.power
}

应该是:

if (family$family == "Tweedie") {
    tweedie.p <- eval(call$family$var.power)
}

您可以使用补丁创建自己的函数,如下所示:

dglm.nograpes <- function (formula = formula(data), dformula = ~1, family = gaussian, 
    dlink = "log", data = sys.parent(), subset = NULL, weights = NULL, 
    contrasts = NULL, method = "ml", mustart = NULL, betastart = NULL, 
    etastart = NULL, phistart = NULL, control = dglm.control(...), 
    ykeep = TRUE, xkeep = FALSE, zkeep = FALSE, ...) 
{
    call <- match.call()
    if (is.character(family)) 
        family <- get(family, mode = "function", envir = parent.frame())
    if (is.function(family)) 
        family <- family()
    if (is.null(family$family)) {
        print(family)
        stop("'family' not recognized")
    }
    mnames <- c("", "formula", "data", "weights", "subset")
    cnames <- names(call)
    cnames <- cnames[match(mnames, cnames, 0)]
    mcall <- call[cnames]
    mcall[[1]] <- as.name("model.frame")
    mframe <<- eval(mcall, sys.parent())
    mf <- match.call(expand.dots = FALSE)
    y <- model.response(mframe, "numeric")
    if (is.null(dim(y))) {
        N <- length(y)
    }
    else {
        N <- dim(y)[1]
    }
    nobs <- N
    mterms <- attr(mframe, "terms")
    X <- model.matrix(mterms, mframe, contrasts)
    weights <- model.weights(mframe)
    if (is.null(weights)) 
        weights <- rep(1, N)
    if (is.null(weights)) 
        weights <- rep(1, N)
    if (!is.null(weights) && any(weights < 0)) {
        stop("negative weights not allowed")
    }
    offset <- model.offset(mframe)
    if (is.null(offset)) 
        offset <- rep(0, N)
    if (!is.null(offset) && length(offset) != NROW(y)) {
        stop(gettextf("number of offsets is %d should equal %d (number of observations)", 
            length(offset), NROW(y)), domain = NA)
    }
    mcall$formula <- formula
    mcall$formula[3] <- switch(match(length(dformula), c(0, 2, 
        3)), 1, dformula[2], dformula[3])
    mframe <- eval(mcall, sys.parent())
    dterms <- attr(mframe, "terms")
    Z <- model.matrix(dterms, mframe, contrasts)
    doffset <- model.extract(mframe, offset)
    if (is.null(doffset)) 
        doffset <- rep(0, N)
    name.dlink <- substitute(dlink)
    if (is.name(name.dlink)) {
        if (is.character(dlink)) {
            name.dlink <- dlink
        }
        else {
            dlink <- name.dlink <- as.character(name.dlink)
        }
    }
    else {
        if (is.call(name.dlink)) 
            name.dlink <- deparse(name.dlink)
    }
    if (!is.null(name.dlink)) 
        name.dlink <- name.dlink
    if (family$family == "Tweedie") {
        tweedie.p <- eval(call$family$var.power)
    }
    Digamma <- family$family == "Gamma" || (family$family == 
        "Tweedie" && tweedie.p == 2)
    if (Digamma) {
        linkinv <- make.link(name.dlink)$linkinv
        linkfun <- make.link(name.dlink)$linkfun
        mu.eta <- make.link(name.dlink)$mu.eta
        valid.eta <- make.link(name.dlink)$valid.eta
        init <- expression({
            if (any(y <= 0)) {
                print(y)
                print(any(y <= 0))
                stop("non-positive values not allowed for the DM gamma family")
            }
            n <- rep.int(1, nobs)
            mustart <- y
        })
        dfamily <- structure(list(family = "Digamma", variance = varfun.digamma, 
            dev.resids = function(y, mu, wt) {
                wt * unitdeviance.digamma(y, mu)
            }, aic = function(y, n, mu, wt, dev) NA, link = name.dlink, 
            linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, 
            initialize = init, validmu = function(mu) {
                all(mu > 0)
            }, valideta = valid.eta))
    }
    else {
        eval(substitute(dfamily <- Gamma(link = lk), list(lk = name.dlink)))
    }
    dlink <- as.character(dfamily$link)
    logdlink <- dlink == "log"
    if (!is.null(call$method)) {
        name.method <- substitute(method)
        if (!is.character(name.method)) 
            name.method <- deparse(name.method)
        list.methods <- c("ml", "reml", "ML", "REML", "Ml", "Reml")
        i.method <- pmatch(method, list.methods, nomatch = 0)
        if (!i.method) 
            stop("Method must be ml or reml")
        method <- switch(i.method, "ml", "reml", "ml", "reml", 
            "ml", "reml")
    }
    reml <- method == "reml"
    if (is.null(mustart)) {
        etastart <- NULL
        eval(family$initialize)
        mu <- mustart
        mustart <- NULL
    }
    if (!is.null(betastart)) {
        eta <- X %*% betastart
        mu <- family$linkinv(eta + offset)
    }
    else {
        if (!is.null(mustart)) {
            mu <- mustart
            eta <- family$linkfun(mu) - offset
        }
        else {
            eta <- lm.fit(X, family$linkfun(mu) - offset, singular.ok = TRUE)$fitted.values
            mu <- family$linkinv(eta + offset)
        }
    }
    d <- family$dev.resids(y, mu, weights)
    if (!is.null(phistart)) {
        phi <- phistart
        deta <- dfamily$linkfun(phi) - doffset
    }
    else {
        deta <- lm.fit(Z, dfamily$linkfun(d + (d == 0)/6) - doffset, 
            singular.ok = TRUE)$fitted.values
        if (logdlink) 
            deta <- deta + 1.27036
        phi <- dfamily$linkinv(deta + offset)
    }
    if (any(phi <= 0)) {
        cat("Some values for  phi  are non-positive, suggesting an inappropriate model", 
            "Try a different link function.\n")
    }
    zm <- as.vector(eta + (y - mu)/family$mu.eta(eta))
    wm <- as.vector(eval(family$variance(mu)) * weights/phi)
    mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
    eta <- mfit$fitted.values
    mu <- family$linkinv(eta + offset)
    cat("family:", family$family, "\n")
    if (family$family == "Tweedie") {
        cat("p:", tweedie.p, "\n")
        if ((tweedie.p > 0) & (any(mu < 0))) {
            cat("Some values for  mu  are negative, suggesting an inappropriate model.", 
                "Try a different link function.\n")
        }
    }
    d <- family$dev.resids(y, mu, weights)
    const <- dglm.constant(y, family, weights)
    if (Digamma) {
        h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) * 
            weights/phi)
    }
    else {
        h <- log(phi/weights)
    }
    m2loglik <- const + sum(h + d/phi)
    if (reml) 
        m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
    m2loglikold <- m2loglik + 1
    epsilon <- control$epsilon
    maxit <- control$maxit
    trace <- control$trace
    iter <- 0
    while (abs(m2loglikold - m2loglik)/(abs(m2loglikold) + 1) > 
        epsilon && iter < maxit) {
        hdot <- 1/dfamily$mu.eta(deta)
        if (Digamma) {
            delta <- 2 * weights * (log(weights/phi) - digamma(weights/phi))
            u <- 2 * weights^2 * (trigamma(weights/phi) - phi/weights)
            fdot <- phi^2/u * hdot
        }
        else {
            delta <- phi
            u <- phi^2
            fdot <- hdot
        }
        wd <- 1/(fdot^2 * u)
        if (reml) {
            h <- hat(mfit$qr)
            delta <- delta - phi * h
            wd <- wd - 2 * (h/hdot^2/phi^2) + h^2
        }
        if (any(wd < 0)) {
            cat(" Some weights are negative; temporarily fixing.  This may be a sign of an inappropriate model.\n")
            wd[wd < 0] <- 0
        }
        if (any(is.infinite(wd))) {
            cat(" Some weights are negative; temporarily fixing.  This may be a sign of an inappropriate model.\n")
            wd[is.infinite(wd)] <- 100
        }
        zd <- deta + (d - delta) * fdot
        dfit <- lm.wfit(Z, zd, wd, method = "qr", singular.ok = TRUE)
        deta <- dfit$fitted.values
        phi <- dfamily$linkinv(deta + doffset)
        if (any(is.infinite(phi))) {
            cat("*** Some values for  phi  are infinite, suggesting an inappropriate model", 
                "Try a different link function.  Making an attempt to continue...\n")
            phi[is.infinite(phi)] <- 10
        }
        zm <- eta + (y - mu)/family$mu.eta(eta)
        fam.wt <- expression(weights * family$variance(mu))
        wm <- eval(fam.wt)/phi
        mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
        eta <- mfit$fitted.values
        mu <- family$linkinv(eta + offset)
        if (family$family == "Tweedie") {
            if ((tweedie.p > 0) & (any(mu < 0))) {
                cat("*** Some values for  mu  are negative, suggesting an inappropriate model.", 
                  "Try a different link function.  Making an attempt to continue...\n")
                mu[mu <= 0] <- 1
            }
        }
        d <- family$dev.resids(y, mu, weights)
        m2loglikold <- m2loglik
        if (Digamma) {
            h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) * 
                weights/phi)
        }
        else {
            h <- log(phi/weights)
        }
        m2loglik <- const + sum(h + d/phi)
        if (reml) {
            m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
        }
        iter <- iter + 1
        if (trace) 
            cat("DGLM iteration ", iter, ": -2*log-likelihood = ", 
                format(round(m2loglik, 4)), " \n", sep = "")
    }
    mfit$formula <- call$formula
    mfit$call <- call
    mfit$family <- family
    mfit$linear.predictors <- mfit$fitted.values + offset
    mfit$fitted.values <- mu
    mfit$prior.weights <- weights
    mfit$terms <- mterms
    mfit$contrasts <- attr(X, "contrasts")
    intercept <- attr(mterms, "intercept")
    mfit$df.null <- N - sum(weights == 0) - as.integer(intercept)
    mfit$call <- call
    mfit$deviance <- sum(d/phi)
    mfit$aic <- NA
    mfit$null.deviance <- glm.fit(x = X, y = y, weights = weights/phi, 
        offset = offset, family = family)
    if (length(mfit$null.deviance) > 1) 
        mfit$null.deviance <- mfit$null.deviance$null.deviance
    if (ykeep) 
        mfit$y <- y
    if (xkeep) 
        mfit$x <- X
    class(mfit) <- c("glm", "lm")
    dfit$family <- dfamily
    dfit$prior.weights <- rep(1, N)
    dfit$linear.predictors <- dfit$fitted.values + doffset
    dfit$fitted.values <- phi
    dfit$terms <- dterms
    dfit$aic <- NA
    call$formula <- call$dformula
    call$dformula <- NULL
    call$family <- call(dfamily$family, link = name.dlink)
    dfit$call <- call
    dfit$residuals <- dfamily$dev.resid(d, phi, wt = rep(1/2, 
        N))
    dfit$deviance <- sum(dfit$residuals)
    dfit$null.deviance <- glm.fit(x = Z, y = d, weights = rep(1/2, 
        N), offset = doffset, family = dfamily)
    if (length(dfit$null.deviance) > 1) 
        dfit$null.deviance <- dfit$null.deviance$null.deviance
    if (ykeep) 
        dfit$y <- d
    if (zkeep) 
        dfit$z <- Z
    dfit$formula <- as.vector(attr(dterms, "formula"))
    dfit$iter <- iter
    class(dfit) <- c("glm", "lm")
    out <- c(mfit, list(dispersion.fit = dfit, iter = iter, method = method, 
        m2loglik = m2loglik))
    class(out) <- c("dglm", "glm", "lm")
    out
}

然后像这样运行它:

dglm.nograpes(y~x,~x,family=tweedie(link.power=0, var.power=p))

关于r - 在 R 中封装 dglm,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28810479/

相关文章:

r - 为什么我的泊松回归的似然/AIC 是无限的?

r - 从另一个包中调用时,无法从 mgcv 访问 "ldTweedie"函数

r - 并行计算时函数参数中的变量不会传递给集群

r - 提取r中分号之间的字符

r - 将许多列除以另一列

r - 水中集成 - 缺失模型

r - 查看lm中是否使用了因变量的对数

在 Excel 中运行 R 脚本

r - mgcv:如何使用 Tweedie 响应模型进行逐步回归?