我有这样的数据框
X1 X2 X3 X4 X5 class
1 1 7 3 9 5 n
2 2 8 4 10 6 n
3 3 9 5 1 7 n
4 4 10 6 2 8 p
5 5 1 7 3 9 p
6 6 2 8 4 10 p
我喜欢对所有列运行 t 检验,按变量类形成的组进行分隔。
我知道我可以为此使用 for 循环,但我想知道是否有更好的代码。
m1 <- data.frame(matrix(c <- (1:10), nrow = 6, ncol = 5))
m1 <- data.frame(m1,c("n","n","n","p","p","p"))
names(m1)[6] = "class"
# work for one column
t.test(X1~class, data= m1)
# What I'm looking for
# t.test(X_i~class, data= m1)
library(dplyr)
library(tidyr)
library(broom)
df <- data.frame(
x1 = 0:9,
x2 = 10:19,
x3 = 20:29,
class = rep(c("a", "b"), each = 5)
)
# Conduct tests and store in nested data.frame
nested <- df %>%
group_by(class) %>%
summarise(across(everything(), ~ list(
t.test(.) %>%
tidy() %>%
select(estimate, statistic,
p.value, conf.low, conf.high)
)))
# Unnest and turn into long structure
long <- nested %>%
unnest(cols = starts_with("x"), names_sep = "_") %>%
pivot_longer(cols = starts_with("x"),
names_to = "quantity",
values_to = "value")
# Split variables into multiple columns and widen
long %>%
separate(col = quantity, into = c("variable", "quantity"), sep = "_") %>%
pivot_wider(names_from = "quantity")
#> # A tibble: 6 x 7
#> class variable estimate statistic p.value conf.low conf.high
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 a x1 2 2.83 0.0474 0.0368 3.96
#> 2 a x2 12 17.0 0.0000707 10.0 14.0
#> 3 a x3 22 31.1 0.00000636 20.0 24.0
#> 4 b x1 7 9.90 0.000584 5.04 8.96
#> 5 b x2 17 24.0 0.0000178 15.0 19.0
#> 6 b x3 27 38.2 0.00000281 25.0 29.0