让我们再试一次......
使用 mtcars 数据集,我想使用同一模型将非线性回归模型拟合到多个因变量和自变量。假设我想使用变量 disp、hp 和 wt 来解释 mpg 和 drat。拟合模型后,我想计算总平方和和残差平方和并将它们存储在矩阵中。这可以通过以下方式完成:
dt <- data.frame(mtcars)
m1 <- nls(mpg ~ B0*(disp^B1)*exp(B2*disp), data=dt, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m2 <- nls(mpg ~ B0*(hp^B1)*exp(B2*hp), data=dt, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m3 <- nls(mpg ~ B0*(wt^B1)*exp(B2*wt), data=dt, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m4 <- nls(drat ~ B0*(disp^B1)*exp(B2*disp), data=dt, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m5 <- nls(drat ~ B0*(hp^B1)*exp(B2*hp), data=dt, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m6 <- nls(drat ~ B0*(wt^B1)*exp(B2*wt), data=dt, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
TSS.mpg <- sum((dt$mpg - mean(dt$mpg))^2)
TSS.drat <- sum((dt$drat - mean(dt$drat))^2)
RSS.m1 <- sum(residuals(m1)^2)
RSS.m2 <- sum(residuals(m2)^2)
RSS.m3 <- sum(residuals(m3)^2)
RSS.m4 <- sum(residuals(m4)^2)
RSS.m5 <- sum(residuals(m5)^2)
RSS.m6 <- sum(residuals(m6)^2)
sumsqu <- matrix(0,6,2)
sumsqu[1:3,1] <- TSS.mpg
sumsqu[4:6,1] <- TSS.drat
sumsqu[1,2] <- RSS.m1
sumsqu[2,2] <- RSS.m2
sumsqu[3,2] <- RSS.m3
sumsqu[4,2] <- RSS.m4
sumsqu[5,2] <- RSS.m5
sumsqu[6,2] <- RSS.m6
因此,最终结果是一个矩阵,其中第一列为总平方和,第二列为残差平方和。现在,让我们通过包含分组因素来使事情变得更加复杂。我想做相同的模型拟合和 SS 提取,但对于基于变量“am”的两个组,其中 am=0 或 1。最终结果将是一个类似于第 1 部分中的矩阵,但有四列,前 2 列am=0 的列和 am=1 的后 2 列。同样,这可以通过...来完成很长的路要走。
#subset the data (am = 0) and refit models
dt0 <- subset(dt, am == 0)
m1.0 <- nls(mpg ~ B0*(disp^B1)*exp(B2*disp), data=dt0, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m2.0 <- nls(mpg ~ B0*(hp^B1)*exp(B2*hp), data=dt0, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m3.0 <- nls(mpg ~ B0*(wt^B1)*exp(B2*wt), data=dt0, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m4.0 <- nls(drat ~ B0*(disp^B1)*exp(B2*disp), data=dt0, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m5.0 <- nls(drat ~ B0*(hp^B1)*exp(B2*hp), data=dt0, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m6.0 <- nls(drat ~ B0*(wt^B1)*exp(B2*wt), data=dt0, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
TSS.mpg.0 <- sum((dt0$mpg - mean(dt0$mpg))^2)
TSS.drat.0 <- sum((dt0$drat - mean(dt0$drat))^2)
RSS.m1.0 <- sum(residuals(m1.0)^2)
RSS.m2.0 <- sum(residuals(m2.0)^2)
RSS.m3.0 <- sum(residuals(m3.0)^2)
RSS.m4.0 <- sum(residuals(m4.0)^2)
RSS.m5.0 <- sum(residuals(m5.0)^2)
RSS.m6.0 <- sum(residuals(m6.0)^2)
sumsqu.0 <- matrix(0,6,2)
sumsqu.0[1:3,1] <- TSS.mpg.0
sumsqu.0[4:6,1] <- TSS.drat.0
sumsqu.0[1,2] <- RSS.m1.0
sumsqu.0[2,2] <- RSS.m2.0
sumsqu.0[3,2] <- RSS.m3.0
sumsqu.0[4,2] <- RSS.m4.0
sumsqu.0[5,2] <- RSS.m5.0
sumsqu.0[6,2] <- RSS.m6.0
#subset the data (am=1) and refit models
dt1 <- subset(dt, am == 1)
m1.1 <- nls(mpg ~ B0*(disp^B1)*exp(B2*disp), data=dt1, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m2.1 <- nls(mpg ~ B0*(hp^B1)*exp(B2*hp), data=dt1, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m3.1 <- nls(mpg ~ B0*(wt^B1)*exp(B2*wt), data=dt1, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m4.1 <- nls(drat ~ B0*(disp^B1)*exp(B2*disp), data=dt1, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m5.1 <- nls(drat ~ B0*(hp^B1)*exp(B2*hp), data=dt1, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
m6.1 <- nls(drat ~ B0*(wt^B1)*exp(B2*wt), data=dt1, start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
TSS.mpg.1 <- sum((dt1$mpg - mean(dt1$mpg))^2)
TSS.drat.1 <- sum((dt1$drat - mean(dt1$drat))^2)
RSS.m1.1 <- sum(residuals(m1.1)^2)
RSS.m2.1 <- sum(residuals(m2.1)^2)
RSS.m3.1 <- sum(residuals(m3.1)^2)
RSS.m4.1 <- sum(residuals(m4.1)^2)
RSS.m5.1 <- sum(residuals(m5.1)^2)
RSS.m6.1 <- sum(residuals(m6.1)^2)
sumsqu.1 <- matrix(0,6,2)
sumsqu.1[1:3,1] <- TSS.mpg.1
sumsqu.1[4:6,1] <- TSS.drat.1
sumsqu.1[1,2] <- RSS.m1.1
sumsqu.1[2,2] <- RSS.m2.1
sumsqu.1[3,2] <- RSS.m3.1
sumsqu.1[4,2] <- RSS.m4.1
sumsqu.1[5,2] <- RSS.m5.1
sumsqu.1[6,2] <- RSS.m6.1
#combine sumsqu.1 and sumsqu.0
allSS <- cbind(sumsqu.0,sumsqu.1)
allSS
正如你所看到的,按照我知道的方式,这个过程变得相当漫长。现在想象一下我的实际问题有 6 个因变量、7 个自变量、5 个组,并从每次拟合中提取 10 个左右的数字。从我的代码中你可以看到我不是程序员,因为我的方法效率非常低。我认为我可以包含某种函数,然后使用一些应用函数,例如......
nls1 <- function(x,y){
m1 <- nls( y ~ B0*(x^B1)*exp(B2*x), data=dt0, start=c(B0 = 3.5, B1 = 0.2, B2 = 0.0007))
RSS <- sum(residuals(m1)^2)
TSS <- sum((y - mean(y))^2)
RSS
TSS
}
如果您能帮助提高此过程的效率,我们将不胜感激。
最佳答案
这里我使用 2 个因变量(drat、mpg)、3 个自变量(disp、hp、wt)和 1 个具有 2 个级别/类别的分组变量(am 为 1/0)。
library(dplyr)
library(tidyr)
# example dataset (picking useful columns)
dt <- data.frame(mtcars) %>% select(drat, mpg, disp, hp, wt, am)
# specify which columns we want as y (dependent) and x (independent)
# grouping variable is specified within the dependent variables
ynames <- c("drat","mpg","am")
xnames <- c("disp","hp","wt")
# create and reshape datasets
dt1 <- dt[,ynames]
dt1 <- gather(dt1, y, yvalue, -am)
dt2 <- dt[,xnames]
dt2 <- gather(dt2, x, xvalue)
dt1 %>%
group_by(y) %>%
do(data.frame(.,dt2)) %>%
group_by(y,x,am) %>%
do({ m1 <- nls( yvalue ~ B0*(xvalue^B1)*exp(B2*xvalue), data=., start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
RSS <- sum(residuals(m1)^2)
TSS <- sum((.$yvalue - mean(.$yvalue))^2)
data.frame(RSS,TSS) })
# y x am RSS TSS
# 1 drat disp 0 1.3090406 2.770242
# 2 drat disp 1 1.1155372 1.590400
# 3 drat hp 0 2.1707337 2.770242
# 4 drat hp 1 0.8342527 1.590400
# 5 drat wt 0 2.2100162 2.770242
# 6 drat wt 1 1.1885811 1.590400
# 7 mpg disp 0 98.4815286 264.587368
# 8 mpg disp 1 46.8674036 456.309231
# 9 mpg hp 0 74.9295161 264.587368
# 10 mpg hp 1 112.5548955 456.309231
# 11 mpg wt 0 104.2894519 264.587368
# 12 mpg wt 1 71.1402536 456.309231
正如您所看到的,上面的方法 reshape 了数据并创建了一个更大的数据集,其中包含所需的所有 y 和 x 变量组合。如果您最终拥有庞大的数据集,您可能会遇到问题。或者也许其他遇到类似问题的人需要处理大长度的变量,并且创建大数据集会产生问题。
最好创建每个模型拟合所需的公式,而不是创建变量组合。这种方法类似于@BondedDust 下面建议的方法。
library(dplyr)
# example dataset (picking useful columns)
dt <- data.frame(mtcars) %>% select(drat, mpg, disp, hp, wt, am)
# specify which columns we want as y (dependent) and x (independent)
ynames <- c("drat","mpg")
xnames <- c("disp","hp","wt")
# get unique values of the grouping variable am
groupvalues = unique(dt$am)
expand.grid(ynames,xnames,groupvalues) %>%
data.frame() %>%
select(y=Var1, x=Var2, group=Var3) %>%
mutate(formula = paste0(y," ~ B0*(",x,"^B1)*exp(B2*",x,")")) %>%
group_by(y,x,group,formula) %>%
do({ m1 <- nls( .$formula, data=dt[dt$am==.$group,],
start=c(B0 = 45, B1 = 0.2, B2 = 0.0007))
RSS <- sum(residuals(m1)^2)
TSS <- sum((dt[dt$am==.$group,][,.$y]- mean(dt[dt$am==.$group,][,.$y]))^2)
data.frame(RSS,TSS) })
# y x group formula RSS TSS
# 1 drat disp 0 drat ~ B0*(disp^B1)*exp(B2*disp) 1.3090406 2.770242
# 2 drat disp 1 drat ~ B0*(disp^B1)*exp(B2*disp) 1.1155372 1.590400
# 3 drat hp 0 drat ~ B0*(hp^B1)*exp(B2*hp) 2.1707337 2.770242
# 4 drat hp 1 drat ~ B0*(hp^B1)*exp(B2*hp) 0.8342527 1.590400
# 5 drat wt 0 drat ~ B0*(wt^B1)*exp(B2*wt) 2.2100162 2.770242
# 6 drat wt 1 drat ~ B0*(wt^B1)*exp(B2*wt) 1.1885811 1.590400
# 7 mpg disp 0 mpg ~ B0*(disp^B1)*exp(B2*disp) 98.4815286 264.587368
# 8 mpg disp 1 mpg ~ B0*(disp^B1)*exp(B2*disp) 46.8674036 456.309231
# 9 mpg hp 0 mpg ~ B0*(hp^B1)*exp(B2*hp) 74.9295161 264.587368
# 10 mpg hp 1 mpg ~ B0*(hp^B1)*exp(B2*hp) 112.5548955 456.309231
# 11 mpg wt 0 mpg ~ B0*(wt^B1)*exp(B2*wt) 104.2894519 264.587368
# 12 mpg wt 1 mpg ~ B0*(wt^B1)*exp(B2*wt) 71.1402536 456.309231
关于r - 将回归模型拟合到多个自变量和因变量,并通过对变量进行分组来获得单独的拟合,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32364043/