r - 按周计算每日观察次数

标签 r time-series

我有两个数据框。一个具有不均匀间隔的每日计数(名为 y),另一个具有均匀间隔的每周数据(名为 gIm;两个变量表示日期:weekStart周末)。我想计算每周在 weekStartweekEnd 之间的所有日常观察,并将这个新的计数向量附加到我的每周数据数据框。

y<-y[order(as.Date(y$date, format="%Y/%m/%d")),] # Sort by week
start<-unique(gIm$weekStart)
end<-unique(gIm$weekEnd)
gIm$count<-NA

for(l in 1:length(gIm[,1])){ # index by weeks in gIm--365 weeks
for(i in 1:nrow(y)){ # index by no. obs in y
gIm$count[i]<-sum(y$count[y$date >= start[l] & y$date <=end[l] ], na.rm=TRUE)
}
}

这是我间隔不均匀的每日数据(对长度表示歉意):

structure(list(date = structure(c(12437, 12478, 12486, 12487, 
12493, 12494, 12495, 12500, 12502, 12506, 12900, 12955, 12962, 
12964, 12977, 12982, 12983, 12985, 12991, 12992, 12993, 13032, 
13033, 13034, 13041, 13046, 13048, 13053, 13055, 13063, 13073, 
13074, 13075, 13082, 13083, 13084, 13094, 13096, 13097, 13101, 
13103, 13104, 13105, 13123, 13124, 13125, 13130, 13133, 13209, 
13214, 13235, 13242, 13244, 13263, 13272, 13277, 13285, 13291, 
13293, 13305, 13306, 13311, 13312, 13314, 13320, 13328, 13339, 
13342, 13346, 13354, 13356, 13357, 13405, 13406, 13410, 13419, 
13420, 13489, 13517, 13518, 13522, 13523, 13525, 13530, 13531, 
13535, 13542, 13543, 13544, 13550, 13551, 13552, 13559, 13560, 
13572, 13573, 13577, 13578, 13579, 13580, 13581, 13585, 13587, 
13592, 13593, 13594, 13600, 13601, 13620, 13621, 13622, 13626, 
13641, 13643, 13647, 13650, 13654, 13657, 13686, 13692, 13704, 
13711, 13717, 13718, 13720, 13726, 14569, 14629, 14630, 14637, 
14642, 14644, 14664, 14672, 14677, 14683, 14713, 14727, 14736, 
14272, 14782, 14789, 14805, 14816, 14825, 14866, 14874, 14880, 
14881, 14930, 14943, 14287, 14314, 14329, 14336, 14250, 14357, 
14362, 14369, 14370), class = "Date"), count = c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 
1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 
1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 12L, 2L, 1L, 1L, 
1L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 3L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 1L, 2L, 2L, 
2L, 1L, 3L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 4L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L)), .Names = c("date", 
"count"), row.names = c(NA, -160L), class = "data.frame")

这是我的每周间隔数据(对长度表示歉意):

structure(list(immigration = c(62L, 53L, 47L, 47L, 46L, 46L, 
47L, 49L, 49L, 43L, 47L, 41L, 46L, 44L, 41L, 45L, 52L, 49L, 47L, 
41L, 41L, 37L, 37L, 36L, 37L, 36L, 37L, 38L, 36L, 34L, 33L, 34L, 
32L, 35L, 34L, 38L, 40L, 43L, 43L, 42L, 42L, 41L, 42L, 48L, 46L, 
47L, 40L, 48L, 44L, 42L, 30L, 32L, 41L, 37L, 37L, 39L, 39L, 43L, 
39L, 39L, 42L, 41L, 41L, 37L, 39L, 37L, 40L, 40L, 41L, 41L, 41L, 
39L, 38L, 35L, 36L, 33L, 31L, 33L, 32L, 32L, 33L, 32L, 31L, 31L, 
33L, 33L, 29L, 32L, 38L, 37L, 36L, 38L, 39L, 41L, 39L, 38L, 39L, 
38L, 31L, 42L, 39L, 37L, 30L, 27L, 33L, 36L, 33L, 35L, 36L, 36L, 
35L, 34L, 39L, 42L, 41L, 44L, 93L, 83L, 91L, 70L, 81L, 100L, 
64L, 78L, 72L, 54L, 48L, 40L, 36L, 33L, 33L, 34L, 34L, 34L, 31L, 
31L, 33L, 32L, 31L, 33L, 38L, 38L, 41L, 40L, 39L, 41L, 41L, 43L, 
43L, 45L, 35L, 43L, 41L, 39L, 29L, 26L, 32L, 38L, 34L, 39L, 39L, 
39L, 39L, 39L, 39L, 42L, 42L, 43L, 42L, 43L, 44L, 41L, 43L, 52L, 
45L, 63L, 64L, 53L, 60L, 57L, 51L, 65L, 44L, 39L, 41L, 38L, 31L, 
30L, 29L, 30L, 31L, 31L, 33L, 35L, 36L, 36L, 37L, 36L, 36L, 38L, 
38L, 39L, 31L, 40L, 39L, 36L, 29L, 21L, 27L, 35L, 33L, 32L, 34L, 
36L, 35L, 32L, 35L, 33L, 34L, 31L, 31L, 33L, 34L, 34L, 33L, 33L, 
32L, 31L, 29L, 25L, 27L, 24L, 24L, 23L, 22L, 23L, 23L, 23L, 22L, 
22L, 21L, 21L, 24L, 23L, 27L, 28L, 29L, 29L, 29L, 30L, 31L, 31L, 
30L, 30L, 30L, 23L, 29L, 27L, 23L, 16L, 17L, 24L, 26L, 26L, 27L, 
28L, 29L, 27L, 29L, 29L, 29L, 28L, 29L, 29L, 29L, 30L, 30L, 29L, 
29L, 28L, 25L, 25L, 25L, 25L, 24L, 24L, 23L, 23L, 23L, 22L, 23L, 
22L, 22L, 21L, 22L, 22L, 23L, 25L, 25L, 26L, 27L, 26L, 27L, 26L, 
27L, 26L, 28L, 21L, 26L, 25L, 24L, 18L, 17L, 24L, 26L, 25L, 25L, 
25L, 24L, 24L, 25L, 26L, 28L, 27L, 32L, 26L, 27L, 29L, 40L, 87L, 
65L, 49L, 57L, 40L, 33L, 30L, 28L, 28L, 29L, 30L, 29L, 26L, 36L, 
26L, 23L, 21L, 21L, 23L, 22L, 24L, 27L, 25L, 26L, 24L, 25L, 26L, 
27L, 24L, 27L, 19L, 24L, 25L, 21L, 15L, 14L), weekStart = structure(c(12421, 
12428, 12435, 12442, 12449, 12456, 12463, 12470, 12477, 12484, 
12491, 12498, 12505, 12512, 12519, 12526, 12533, 12540, 12547, 
12554, 12561, 12568, 12575, 12582, 12589, 12596, 12603, 12610, 
12617, 12624, 12631, 12638, 12645, 12652, 12659, 12666, 12673, 
12680, 12687, 12694, 12701, 12708, 12715, 12722, 12729, 12736, 
12743, 12750, 12757, 12764, 12771, 12778, 12785, 12792, 12799, 
12806, 12813, 12820, 12827, 12834, 12841, 12848, 12855, 12862, 
12869, 12876, 12883, 12890, 12897, 12904, 12911, 12918, 12925, 
12932, 12939, 12946, 12953, 12960, 12967, 12974, 12981, 12988, 
12995, 13002, 13009, 13016, 13023, 13030, 13037, 13044, 13051, 
13058, 13065, 13072, 13079, 13086, 13093, 13100, 13107, 13114, 
13121, 13128, 13135, 13142, 13149, 13156, 13163, 13170, 13177, 
13184, 13191, 13198, 13205, 13212, 13219, 13226, 13233, 13240, 
13247, 13254, 13261, 13268, 13275, 13282, 13289, 13296, 13303, 
13310, 13317, 13324, 13331, 13338, 13345, 13352, 13359, 13366, 
13373, 13380, 13387, 13394, 13401, 13408, 13415, 13422, 13429, 
13436, 13443, 13450, 13457, 13464, 13471, 13478, 13485, 13492, 
13499, 13506, 13513, 13520, 13527, 13534, 13541, 13548, 13555, 
13562, 13569, 13576, 13583, 13590, 13597, 13604, 13611, 13618, 
13625, 13632, 13639, 13646, 13653, 13660, 13667, 13674, 13681, 
13688, 13695, 13702, 13709, 13716, 13723, 13730, 13737, 13744, 
13751, 13758, 13765, 13772, 13779, 13786, 13793, 13800, 13807, 
13814, 13821, 13828, 13835, 13842, 13849, 13856, 13863, 13870, 
13877, 13884, 13891, 13898, 13905, 13912, 13919, 13926, 13933, 
13940, 13947, 13954, 13961, 13968, 13975, 13982, 13989, 13996, 
14003, 14010, 14017, 14024, 14031, 14038, 14045, 14052, 14059, 
14066, 14073, 14080, 14087, 14094, 14101, 14108, 14115, 14122, 
14129, 14136, 14143, 14150, 14157, 14164, 14171, 14178, 14185, 
14192, 14199, 14206, 14213, 14220, 14227, 14234, 14241, 14248, 
14255, 14262, 14269, 14276, 14283, 14290, 14297, 14304, 14311, 
14318, 14325, 14332, 14339, 14346, 14353, 14360, 14367, 14374, 
14381, 14388, 14395, 14402, 14409, 14416, 14423, 14430, 14437, 
14444, 14451, 14458, 14465, 14472, 14479, 14486, 14493, 14500, 
14507, 14514, 14521, 14528, 14535, 14542, 14549, 14556, 14563, 
14570, 14577, 14584, 14591, 14598, 14605, 14612, 14619, 14626, 
14633, 14640, 14647, 14654, 14661, 14668, 14675, 14682, 14689, 
14696, 14703, 14710, 14717, 14724, 14731, 14738, 14745, 14752, 
14759, 14766, 14773, 14780, 14787, 14794, 14801, 14808, 14815, 
14822, 14829, 14836, 14843, 14850, 14857, 14864, 14871, 14878, 
14885, 14892, 14899, 14906, 14913, 14920, 14927, 14934, 14941, 
14948, 14955, 14962, 14969), class = "Date"), weekEnd = structure(c(12427, 
12434, 12441, 12448, 12455, 12462, 12469, 12476, 12483, 12490, 
12497, 12504, 12511, 12518, 12525, 12532, 12539, 12546, 12553, 
12560, 12567, 12574, 12581, 12588, 12595, 12602, 12609, 12616, 
12623, 12630, 12637, 12644, 12651, 12658, 12665, 12672, 12679, 
12686, 12693, 12700, 12707, 12714, 12721, 12728, 12735, 12742, 
12749, 12756, 12763, 12770, 12777, 12784, 12791, 12798, 12805, 
12812, 12819, 12826, 12833, 12840, 12847, 12854, 12861, 12868, 
12875, 12882, 12889, 12896, 12903, 12910, 12917, 12924, 12931, 
12938, 12945, 12952, 12959, 12966, 12973, 12980, 12987, 12994, 
13001, 13008, 13015, 13022, 13029, 13036, 13043, 13050, 13057, 
13064, 13071, 13078, 13085, 13092, 13099, 13106, 13113, 13120, 
13127, 13134, 13141, 13148, 13155, 13162, 13169, 13176, 13183, 
13190, 13197, 13204, 13211, 13218, 13225, 13232, 13239, 13246, 
13253, 13260, 13267, 13274, 13281, 13288, 13295, 13302, 13309, 
13316, 13323, 13330, 13337, 13344, 13351, 13358, 13365, 13372, 
13379, 13386, 13393, 13400, 13407, 13414, 13421, 13428, 13435, 
13442, 13449, 13456, 13463, 13470, 13477, 13484, 13491, 13498, 
13505, 13512, 13519, 13526, 13533, 13540, 13547, 13554, 13561, 
13568, 13575, 13582, 13589, 13596, 13603, 13610, 13617, 13624, 
13631, 13638, 13645, 13652, 13659, 13666, 13673, 13680, 13687, 
13694, 13701, 13708, 13715, 13722, 13729, 13736, 13743, 13750, 
13757, 13764, 13771, 13778, 13785, 13792, 13799, 13806, 13813, 
13820, 13827, 13834, 13841, 13848, 13855, 13862, 13869, 13876, 
13883, 13890, 13897, 13904, 13911, 13918, 13925, 13932, 13939, 
13946, 13953, 13960, 13967, 13974, 13981, 13988, 13995, 14002, 
14009, 14016, 14023, 14030, 14037, 14044, 14051, 14058, 14065, 
14072, 14079, 14086, 14093, 14100, 14107, 14114, 14121, 14128, 
14135, 14142, 14149, 14156, 14163, 14170, 14177, 14184, 14191, 
14198, 14205, 14212, 14219, 14226, 14233, 14240, 14247, 14254, 
14261, 14268, 14275, 14282, 14289, 14296, 14303, 14310, 14317, 
14324, 14331, 14338, 14345, 14352, 14359, 14366, 14373, 14380, 
14387, 14394, 14401, 14408, 14415, 14422, 14429, 14436, 14443, 
14450, 14457, 14464, 14471, 14478, 14485, 14492, 14499, 14506, 
14513, 14520, 14527, 14534, 14541, 14548, 14555, 14562, 14569, 
14576, 14583, 14590, 14597, 14604, 14611, 14618, 14625, 14632, 
14639, 14646, 14653, 14660, 14667, 14674, 14681, 14688, 14695, 
14702, 14709, 14716, 14723, 14730, 14737, 14744, 14751, 14758, 
14765, 14772, 14779, 14786, 14793, 14800, 14807, 14814, 14821, 
14828, 14835, 14842, 14849, 14856, 14863, 14870, 14877, 14884, 
14891, 14898, 14905, 14912, 14919, 14926, 14933, 14940, 14947, 
14954, 14961, 14968, 14975), class = "Date")), .Names = c("immigration", 
"weekStart", "weekEnd"), class = "data.frame", row.names = c(NA, 
-365L))

感谢您的帮助!

最佳答案

y 中每个日期的星期开始:

y$weekStart <- y$date - as.POSIXlt(y$date)$wday

将这些汇总以合并周(从结果中省略现在不需要的 date 列):

yy <- aggregate(count ~ weekStart, data=y, FUN=sum)

最后,与gIm合并:

m <- merge(gIm, yy, all=TRUE)


> head(m, 10)
    weekStart immigration    weekEnd count
1  2004-01-04          62 2004-01-10    NA
2  2004-01-11          53 2004-01-17    NA
3  2004-01-18          47 2004-01-24     1
4  2004-01-25          47 2004-01-31    NA
5  2004-02-01          46 2004-02-07    NA
6  2004-02-08          46 2004-02-14    NA
7  2004-02-15          47 2004-02-21    NA
8  2004-02-22          49 2004-02-28    NA
9  2004-02-29          49 2004-03-06     1
10 2004-03-07          43 2004-03-13     2

关于r - 按周计算每日观察次数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15468511/

相关文章:

r - R的ASCII绘图功能

r - 在嵌套 for 循环中删除值,并在 R 中使用多个替换长度

r - R中的时间序列-对齐具有不同时间戳的数据

python-3.x - Python fbprophet - 每年从 plot_components() 导出值

随机森林错误 : Error in `[.data.frame` (data,,all.vars(Terms),drop = FALSE):选择了未定义的列

r - 识别一行中不同元素数量的有效方法

r - 无法从 github 安装包 - 无法解释的错误消息

r 拨浪鼓fancyrpartplot 错误

machine-learning - 多个观测变量的隐马尔可夫模型

cassandra - 时空序列的复合分区键(Cassandra)与交错索引(Accumulo、BigTable)