r - 如何在 dplyr 分组数据上使用 rollmean

标签 r group-by dplyr moving-average rolling-computation

我希望我的示例数据看起来不太大

df <- structure(list(date = structure(c(17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140), class = "Date"), Gender = c("Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male"), Age = c("Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger"), attribute = c("Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B"), measure_1 = c(0.33, 0.31, 0.31, 0.16, 0.37, 0.29, 
0.27, 0.26, 0.24, 0.38, 0.47, 0.21, 0.32, 0.24, 0.26, 0.38, 0.38, 
0.39, 0.37, 0.3, 0.29, 0.48, 0.45, 0.45, 0.35, 0.49, 0.44, 0.41, 
0.44, 0.35, 0.38, 0.39, 0.55, 0.45, 0.43, 0.38, 0.38, 0.57, 0.47, 
0.51, 0.48, 0.32, 0.27, 0.22, 0.13, 0.02, 0.12, 0.16, 0.15, 0.17, 
0.23, 0.12, 0.31, 0.12, 0.16, 0.16, 0.16, 0.24, 0.06, 0.06, 0.17, 
0.15, 0.14, 0.37, 0.35, 0.2, 0.17, 0.25, 0.2, 0.3, 0.23, 0.26, 
0.14, 0.29, 0.35, 0.14, 0.32, 0.14, 0.14, 0.24, 0.18, 0.24, 0.24, 
0.17, 0.4, 0.3, 0.36, 0.41, 0.38, 0.31, 0.33, 0.43, 0.27, 0.31, 
0.26, 0.29, 0.25, 0.23, 0.38, 0.2, 0.29, 0.26, 0.22, 0.41, 0.25, 
0.45, 0.4, 0.54, 0.51, 0.48, 0.46, 0.4, 0.48, 0.29, 0.33, 0.36, 
0.48, 0.5, 0.32, 0.42, 0.43, 0.35, 0.35, 0.49, 0.44, 0.42, 0.48, 
0.34, 0.44, 0.38, 0.49, 0.27, 0.33, 0.42, 0.31, 0.32, 0.31, 0.38, 
0.46, 0.35, 0.4, 0.36, 0.38, 0.51, 0.41, 0.44, 0.36, 0.7, 0.57, 
0.66, 0.65, 0.57, 0.62, 0.53, 0.52, 0.43, 0.52, 0.53, 0.61, 0.67, 
0.59, 0.57, 0.55, 0.54, 0.67, 0.54, 0.57, 0.57), measure_2 = c(0.5, 
0.47, 0.48, 0.31, 0.54, 0.45, 0.43, 0.42, 0.4, 0.55, 0.66, 0.37, 
0.49, 0.4, 0.42, 0.56, 0.55, 0.57, 0.54, 0.47, 0.45, 0.66, 0.63, 
0.63, 0.52, 0.67, 0.62, 0.58, 0.61, 0.52, 0.55, 0.57, 0.74, 0.63, 
0.61, 0.56, 0.56, 0.77, 0.66, 0.7, 0.67, 0.49, 0.38, 0.32, 0.23, 
0.12, 0.22, 0.26, 0.25, 0.27, 0.34, 0.22, 0.41, 0.21, 0.26, 0.26, 
0.26, 0.34, 0.16, 0.16, 0.27, 0.25, 0.24, 0.48, 0.45, 0.31, 0.27, 
0.36, 0.3, 0.4, 0.34, 0.36, 0.24, 0.39, 0.45, 0.24, 0.43, 0.24, 
0.24, 0.35, 0.28, 0.34, 0.35, 0.27, 0.51, 0.43, 0.48, 0.52, 0.49, 
0.44, 0.46, 0.54, 0.4, 0.44, 0.4, 0.42, 0.39, 0.37, 0.49, 0.34, 
0.42, 0.39, 0.36, 0.52, 0.39, 0.56, 0.51, 0.63, 0.6, 0.58, 0.56, 
0.51, 0.58, 0.42, 0.46, 0.48, 0.58, 0.59, 0.45, 0.52, 0.54, 0.47, 
0.47, 0.58, 0.54, 0.53, 0.7, 0.62, 0.68, 0.64, 0.7, 0.59, 0.62, 
0.67, 0.61, 0.61, 0.61, 0.65, 0.69, 0.63, 0.65, 0.64, 0.64, 0.71, 
0.66, 0.68, 0.63, 0.81, 0.75, 0.8, 0.79, 0.75, 0.77, 0.72, 0.72, 
0.67, 0.72, 0.72, 0.77, 0.8, 0.76, 0.75, 0.73, 0.73, 0.8, 0.73, 
0.75, 0.74)), class = "data.frame", row.names = c(NA, -168L), na.action = structure(169:176, .Names = c("169", 
"170", "171", "172", "173", "174", "175", "176"), class = "omit"))

我想找到一个整洁的 %>% 类型解决方案来以 12 个月为基础滚动分组数据。也就是说,我想对多个分类变量(例如年龄、性别和测量类别)进行分组,并查找任何相关数值变量的 12 个月滚动平均值

这似乎可行,但代码不容易解释

df1 <- df # mutate(date = as.Date(date)) %>% select(-date)  %>% group_by(Gender, Age, attribute) %>% mutate_if(is.numeric, function(x, n = 12){stats::filter(x, rep(1 / n, n), sides = 1)}) 

我读过很多关于 rollmean 和 rollmeanr 的文章,但无法让它处理分组数据。如何使用如此简单的函数编写一两行解决方案?

最佳答案

1) 使用较小的示例(将来请提供最少数据)

DF <- data.frame(group = c(1, 1, 1, 2, 2, 2), value1 = 1:6, value2 = 7:12)

library(dplyr)
library(zoo)

DF %>%
  group_by(group) %>%
  mutate_at(vars(contains("value")), list(roll = ~ rollmeanr(., k = 2, fill = NA))) %>%
  ungroup

给予:

# A tibble: 6 x 5
  group value1 value2 value1_roll value2_roll
  <dbl>  <int>  <int>       <dbl>       <dbl>
1     1      1      7        NA          NA  
2     1      2      8         1.5         7.5
3     1      3      9         2.5         8.5
4     2      4     10        NA          NA  
5     2      5     11         4.5        10.5
6     2      6     12         5.5        11.5

2) 或者如果您不需要原始 value1value2:

DF %>%
  group_by(group) %>%
  mutate_at(vars(contains("value")), rollmeanr, k = 2, fill = NA) %>%
  ungroup

给予:

# A tibble: 6 x 3
  group value1 value2
  <dbl>  <dbl>  <dbl>
1     1   NA     NA  
2     1    1.5    7.5
3     1    2.5    8.5
4     2   NA     NA  
5     2    4.5   10.5
6     2    5.5   11.5

3)另一种方法是

DF %>%
  group_by(group) %>%
  do(cbind(., roll = rollmeanr(.[-1], k = 2, fill = NA))) %>%
  ungroup

或者没有原始变量:

DF %>%
  group_by(group) %>%
  do(rollmeanr(.[-1], k = 2, fill = NA) %>% as.data.frame) %>%
  ungroup

关于r - 如何在 dplyr 分组数据上使用 rollmean,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59437764/

相关文章:

R devtools::check LICENSE 未提及以及描述文件中的其他问题

mysql - SQL如何按值选择最近的时间戳?

使用另一个数据帧作为 R 中的键替换数据帧中的所有值

r - 如何从R中的1行表转换向量

复制每一行并将一列更改为二进制值

sql - 在连接中使用聚合函数时如何使用 Group By 子句?

r - 如何使用 R 的 {collapse} 包来实现正确的 fgroup_by() |> ftransform() 输出?

r - 使用 dplyr 和 Shiny 的非标准评估

r - 如何使用 R 从多个数据表中提取列的某些值的行?

mysql - 如何计算历史表中每个状态的数量请参阅输出