r - 在 R 中按组有条件地计算两个日期时间列之间的行

标签 r datetime count tidyverse purrr

我正在处理用户生成的数据,我想计算特定时间段内每个 ID 变量的两个不同日期时间列之间的行数。这是一个模拟数据框,类似于我正在使用的数据框(抱歉它很长):

library(Pareto)
library(ggplot2)
library(lubridate)
library(stringi)

set.seed(1)
n_users<-150
n_rows <- 1000
relative_probs <- rPareto(n = n_users, t = 1, alpha = 0.3, truncation = 500) 
UserId<-stri_rand_strings(n_users,6)
UserId <- sample(UserId, size = n_rows, prob = relative_probs, replace = TRUE)

starttimes<-structure(c(1612224005, 1612224010, 1612224022, 1612224027, 1612224038, 
                        1612224040, 1612224050, 1612224067, 1612224018, 1612224092, 1612224082, 
                        1612224076, 1612224065, 1612224107, 1612224144, 1612224085, 1612224037, 
                        1612224212, 1612224249, 1612224281, 1612224271, 1612224293, 1612224281, 
                        1612224348, 1612224358, 1612224286, 1612224398, 1612224098, 1612224498, 
                        1612224474, 1612224511, 1612224511, 1612224522, 1612224443, 1612224085, 
                        1612224480, 1612224309, 1612224583, 1612224726, 1612224533, 1612224732, 
                        1612224752, 1612224741, 1612224322, 1612224856, 1612224887, 1612224879, 
                        1612224673, 1612224968, 1612224711, 1612224230, 1612225058, 1612224985, 
                        1612225166, 1612224159, 1612224522, 1612225124, 1612224676, 1612224695, 
                        1612225361, 1612224854, 1612225316, 1612225161, 1612224662, 1612225263, 
                        1612225511, 1612224232, 1612225636, 1612225642, 1612225648, 1612225662, 
                        1612225667, 1612225673, 1612225681, 1612225668, 1612225689, 1612225699, 
                        1612225685, 1612224031, 1612225797, 1612224427, 1612225705, 1612225814, 
                        1612225743, 1612225869, 1612225919, 1612225926, 1612225829, 1612225154, 
                        1612226208, 1612226216, 1612226234, 1612226103, 1612226249, 1612226255, 
                        1612226267, 1612226261, 1612226278, 1612226283, 1612226253, 1612226339, 
                        1612226394, 1612226409, 1612224236, 1612226472, 1612226477, 1612226547, 
                        1612226560, 1612226602, 1612226608, 1612225074, 1612226633, 1612226200, 
                        1612226614, 1612226667, 1612226678, 1612226729, 1612226749, 1612226746, 
                        1612226803, 1612226607, 1612226912, 1612226922, 1612226930, 1612226661, 
                        1612226938, 1612226966, 1612227017, 1612227026, 1612227032, 1612227043, 
                        1612227065, 1612226991, 1612227174, 1612227060, 1612227179, 1612227184, 
                        1612227191, 1612227127, 1612226655, 1612227079, 1612225878, 1612227235, 
                        1612227181, 1612227249, 1612227309, 1612227317, 1612227337, 1612226341, 
                        1612227290, 1612227352, 1612227390, 1612226677, 1612227397, 1612227404, 
                        1612227426, 1612227447, 1612227196, 1612227245, 1612224115, 1612227444, 
                        1612227576, 1612227583, 1612225830, 1612227639, 1612227692, 1612227741, 
                        1612227767, 1612225934, 1612226260, 1612225495, 1612227572, 1612227886, 
                        1612227547, 1612227857, 1612228109, 1612228049, 1612228106, 1612228148, 
                        1612228337, 1612228351, 1612228358, 1612228148, 1612228412, 1612228505, 
                        1612228523, 1612228444, 1612228574, 1612228587, 1612228597, 1612228607, 
                        1612228937, 1612228955, 1612228962, 1612228969, 1612229187, 1612228571, 
                        1612229235, 1612229364, 1612229374, 1612229519, 1612226920, 1612229434, 
                        1612229282, 1612229762, 1612229025, 1612230063, 1612230201, 1612230372, 
                        1612230380, 1612230469, 1612229782, 1612230493, 1612230395, 1612230617, 
                        1612230540, 1612225845, 1612230535, 1612230671, 1612229443, 1612230646, 
                        1612230830, 1612228533, 1612230889, 1612230907, 1612230939, 1612230954, 
                        1612230975, 1612227207, 1612230981, 1612230991, 1612230677, 1612231009, 
                        1612231017, 1612231023, 1612231016, 1612231036, 1612231064, 1612231085, 
                        1612231105, 1612231122, 1612231137, 1612231151, 1612231156, 1612231173, 
                        1612231030, 1612230998, 1612230838, 1612231627, 1612231651, 1612231657, 
                        1612231672, 1612231203, 1612231927, 1612231226, 1612231939, 1612232151, 
                        1612232159, 1612232375, 1612232385, 1612232526, 1612232705, 1612232761, 
                        1612232721, 1612231191, 1612232905, 1612232915, 1612232923, 1612232972, 
                        1612225176, 1612233131, 1612224567, 1612233071, 1612234287, 1612234426, 
                        1612234443, 1612234449, 1612234650, 1612234658, 1612225211, 1612227597, 
                        1612232868, 1612232580, 1612234987, 1612235008, 1612235133, 1612235182, 
                        1612234722, 1612234967, 1612235140, 1612235398, 1612235409, 1612235417, 
                        1612235432, 1612235450, 1612235459, 1612235710, 1612236491, 1612236575, 
                        1612236582, 1612226403, 1612236587, 1612238122, 1612238128, 1612238028, 
                        1612238207, 1612234517, 1612238299, 1612238475, 1612239138, 1612239150, 
                        1612239156, 1612239428, 1612231495, 1612239577, 1612239586, 1612228731, 
                        1612238892, 1612239592, 1612239651, 1612239692, 1612239692, 1612239699, 
                        1612239786, 1612239940, 1612240802, 1612240808, 1612240903, 1612240908, 
                        1612240913, 1612240919, 1612240935, 1612240941, 1612240947, 1612241020, 
                        1612239507, 1612224122, 1612240873, 1612230662, 1612241642, 1612241630, 
                        1612241680, 1612241689, 1612241727, 1612241649, 1612241740, 1612241749, 
                        1612241800, 1612241760, 1612241738, 1612241890, 1612241964, 1612241971, 
                        1612241976, 1612241997, 1612242009, 1612242078, 1612227671, 1612242173, 
                        1612241819, 1612242425, 1612242509, 1612242055, 1612242595, 1612242746, 
                        1612230074, 1612242983, 1612242993, 1612243012, 1612243020, 1612243029, 
                        1612243036, 1612242957, 1612243145, 1612243169, 1612243042, 1612243136, 
                        1612243227, 1612242992, 1612243173, 1612243179, 1612232803, 1612243829, 
                        1612243930, 1612243951, 1612243977, 1612243431, 1612243991, 1612243986, 
                        1612243896, 1612244036, 1612244089, 1612244114, 1612244124, 1612244130, 
                        1612244140, 1612244150, 1612244543, 1612242359, 1612244555, 1612244268, 
                        1612244768, 1612243995, 1612238398, 1612245001, 1612245038, 1612245146, 
                        1612245157, 1612245164, 1612239697, 1612244996, 1612245319, 1612245430, 
                        1612242602, 1612245198, 1612245489, 1612245565, 1612245572, 1612241056, 
                        1612245593, 1612245564, 1612243723, 1612245749, 1612245586, 1612245811, 
                        1612245677, 1612245854, 1612245871, 1612245893, 1612245899, 1612245905, 
                        1612245910, 1612245915, 1612245830, 1612235046, 1612246117, 1612246124, 
                        1612246185, 1612245434, 1612246573, 1612245932, 1612246762, 1612246770, 
                        1612246776, 1612246786, 1612246806, 1612246888, 1612246900, 1612246918, 
                        1612246923, 1612246929, 1612246937, 1612246949, 1612246981, 1612245786, 
                        1612246943, 1612247016, 1612247021, 1612246988, 1612247034, 1612246974, 
                        1612247044, 1612247047, 1612247055, 1612247065, 1612247071, 1612247085, 
                        1612247094, 1612247110, 1612247116, 1612247123, 1612247129, 1612247135, 
                        1612247150, 1612247163, 1612247166, 1612246605, 1612247172, 1612247181, 
                        1612247188, 1612247202, 1612247209, 1612247031, 1612247217, 1612247226, 
                        1612247206, 1612247236, 1612247245, 1612247258, 1612247272, 1612247301, 
                        1612247315, 1612247323, 1612247329, 1612247337, 1612247273, 1612246181, 
                        1612247570, 1612246720, 1612247820), class = c("POSIXct", "POSIXt"
                        ), tzone = "UTC")

endtimes<-structure(c(1612224010, 1612224018, 1612224027, 1612224038, 1612224046, 
                      1612224051, 1612224064, 1612224081, 1612224092, 1612224098, 1612224124, 
                      1612224152, 1612224156, 1612224187, 1612224191, 1612224203, 1612224230, 
                      1612224231, 1612224286, 1612224290, 1612224291, 1612224299, 1612224322, 
                      1612224353, 1612224374, 1612224377, 1612224426, 1612224436, 1612224507, 
                      1612224510, 1612224520, 1612224533, 1612224537, 1612224582, 1612224621, 
                      1612224662, 1612224673, 1612224675, 1612224739, 1612224741, 1612224741, 
                      1612224856, 1612224878, 1612224886, 1612224892, 1612224906, 1612224922, 
                      1612224983, 1612225023, 1612225029, 1612225060, 1612225071, 1612225095, 
                      1612225185, 1612225198, 1612225215, 1612225246, 1612225262, 1612225301, 
                      1612225381, 1612225416, 1612225423, 1612225432, 1612225487, 1612225495, 
                      1612225550, 1612225636, 1612225642, 1612225648, 1612225661, 1612225667, 
                      1612225673, 1612225680, 1612225681, 1612225683, 1612225695, 1612225699, 
                      1612225727, 1612225796, 1612225813, 1612225814, 1612225835, 1612225845, 
                      1612225869, 1612225875, 1612225925, 1612225933, 1612226103, 1612226208, 
                      1612226215, 1612226225, 1612226244, 1612226252, 1612226254, 1612226266, 
                      1612226277, 1612226283, 1612226339, 1612226341, 1612226353, 1612226391, 
                      1612226406, 1612226414, 1612226471, 1612226477, 1612226543, 1612226556, 
                      1612226602, 1612226607, 1612226633, 1612226647, 1612226656, 1612226657, 
                      1612226661, 1612226683, 1612226685, 1612226749, 1612226803, 1612226845, 
                      1612226872, 1612226911, 1612226918, 1612226930, 1612226933, 1612226936, 
                      1612226965, 1612227017, 1612227024, 1612227032, 1612227043, 1612227065, 
                      1612227072, 1612227076, 1612227176, 1612227178, 1612227184, 1612227191, 
                      1612227204, 1612227210, 1612227231, 1612227235, 1612227245, 1612227245, 
                      1612227256, 1612227309, 1612227317, 1612227329, 1612227346, 1612227367, 
                      1612227374, 1612227389, 1612227397, 1612227404, 1612227404, 1612227417, 
                      1612227443, 1612227461, 1612227547, 1612227596, 1612227599, 1612227619, 
                      1612227633, 1612227639, 1612227690, 1612227691, 1612227740, 1612227741, 
                      1612227767, 1612227840, 1612227842, 1612227885, 1612227899, 1612227899, 
                      1612228048, 1612228057, 1612228123, 1612228148, 1612228157, 1612228163, 
                      1612228351, 1612228356, 1612228362, 1612228412, 1612228444, 1612228514, 
                      1612228533, 1612228571, 1612228586, 1612228597, 1612228607, 1612228620, 
                      1612228955, 1612228961, 1612228969, 1612229186, 1612229226, 1612229235, 
                      1612229282, 1612229374, 1612229490, 1612229570, 1612229599, 1612229722, 
                      1612229761, 1612229782, 1612230063, 1612230074, 1612230212, 1612230391, 
                      1612230405, 1612230469, 1612230487, 1612230539, 1612230609, 1612230631, 
                      1612230645, 1612230662, 1612230673, 1612230698, 1612230819, 1612230825, 
                      1612230836, 1612230888, 1612230895, 1612230913, 1612230954, 1612230971, 
                      1612230989, 1612230997, 1612230998, 1612231008, 1612231017, 1612231019, 
                      1612231022, 1612231029, 1612231033, 1612231061, 1612231070, 1612231105, 
                      1612231121, 1612231137, 1612231143, 1612231156, 1612231171, 1612231191, 
                      1612231203, 1612231223, 1612231489, 1612231640, 1612231657, 1612231672, 
                      1612231686, 1612231927, 1612231938, 1612232033, 1612232151, 1612232157, 
                      1612232375, 1612232381, 1612232526, 1612232579, 1612232721, 1612232803, 
                      1612232868, 1612232902, 1612232909, 1612232966, 1612232971, 1612232980, 
                      1612233026, 1612233146, 1612233387, 1612234333, 1612234426, 1612234443, 
                      1612234449, 1612234650, 1612234658, 1612234722, 1612234800, 1612234806, 
                      1612234967, 1612234987, 1612235005, 1612235039, 1612235139, 1612235187, 
                      1612235203, 1612235328, 1612235387, 1612235409, 1612235413, 1612235432, 
                      1612235450, 1612235458, 1612235492, 1612235716, 1612236571, 1612236582, 
                      1612236587, 1612237340, 1612238021, 1612238127, 1612238143, 1612238207, 
                      1612238299, 1612238334, 1612238398, 1612238526, 1612239149, 1612239156, 
                      1612239162, 1612239507, 1612239576, 1612239582, 1612239591, 1612239600, 
                      1612239651, 1612239690, 1612239691, 1612239696, 1612239699, 1612239704, 
                      1612239791, 1612239960, 1612240807, 1612240884, 1612240908, 1612240913, 
                      1612240919, 1612240935, 1612240941, 1612240947, 1612241019, 1612241027, 
                      1612241397, 1612241482, 1612241516, 1612241532, 1612241649, 1612241680, 
                      1612241689, 1612241726, 1612241733, 1612241740, 1612241749, 1612241755, 
                      1612241818, 1612241851, 1612241889, 1612241961, 1612241971, 1612241976, 
                      1612241996, 1612242008, 1612242019, 1612242118, 1612242141, 1612242420, 
                      1612242436, 1612242509, 1612242598, 1612242677, 1612242729, 1612242790, 
                      1612242914, 1612242993, 1612243008, 1612243019, 1612243026, 1612243036, 
                      1612243041, 1612243144, 1612243145, 1612243169, 1612243169, 1612243171, 
                      1612243227, 1612243264, 1612243430, 1612243528, 1612243673, 1612243866, 
                      1612243943, 1612243971, 1612243986, 1612243995, 1612244002, 1612244023, 
                      1612244074, 1612244084, 1612244114, 1612244121, 1612244129, 1612244136, 
                      1612244146, 1612244278, 1612244551, 1612244567, 1612244678, 1612244683, 
                      1612244856, 1612245001, 1612245037, 1612245128, 1612245142, 1612245157, 
                      1612245164, 1612245199, 1612245241, 1612245383, 1612245434, 1612245439, 
                      1612245517, 1612245541, 1612245544, 1612245572, 1612245586, 1612245590, 
                      1612245599, 1612245668, 1612245748, 1612245785, 1612245815, 1612245846, 
                      1612245849, 1612245870, 1612245893, 1612245899, 1612245905, 1612245910, 
                      1612245915, 1612245921, 1612245932, 1612246048, 1612246127, 1612246134, 
                      1612246215, 1612246242, 1612246748, 1612246761, 1612246767, 1612246776, 
                      1612246782, 1612246806, 1612246888, 1612246900, 1612246917, 1612246922, 
                      1612246929, 1612246933, 1612246943, 1612246959, 1612246987, 1612247008, 
                      1612247015, 1612247021, 1612247027, 1612247031, 1612247041, 1612247045, 
                      1612247053, 1612247059, 1612247063, 1612247071, 1612247076, 1612247094, 
                      1612247110, 1612247116, 1612247122, 1612247127, 1612247135, 1612247142, 
                      1612247156, 1612247170, 1612247175, 1612247178, 1612247180, 1612247186, 
                      1612247202, 1612247208, 1612247216, 1612247220, 1612247222, 1612247226, 
                      1612247243, 1612247245, 1612247250, 1612247264, 1612247272, 1612247301, 
                      1612247320, 1612247329, 1612247337, 1612247346, 1612247364, 1612247552, 
                      1612247739, 1612247815, 1612247833), class = c("POSIXct", "POSIXt"
                      ), tzone = "UTC")



df<-as.data.frame(cbind(UserId,starttimes,endtimes))
df$starttimes<-as.numeric(df$starttimes)
df$endtimes<-as.numeric(df$endtimes)
df$starttimes<-lubridate::as_datetime(df$starttimes)
df$endtimes<-lubridate::as_datetime(df$endtimes)



df<-df%>%arrange(UserId,starttimes)


基本上我想做的是计算每个UserId的时间戳之间的行数,条件如下:-

UserId分组,如果前导starttimes时间戳比前一个endtimes时间戳晚> = 1小时,则对前面的行进行计数.

这是我拥有的代码的工作示例,但仅适用于一个时间戳列。这是基于我不久前发布的类似问题:-

Count rows (conditionally) within specified time period by group in R

对于此示例,我将在 starttimes 时间戳列上使用它:-


library(purrr)
library(tidyverse)


# this is the code to conditionally count the rows between timestamps
SessionCount <- function(data,maxdelay){
  result <- list()
  row <- 0
  events <- 0
  OrigTime <- NA
  n <- nrow(data)
  
  for (i in 1:n) {
    if (is.na(OrigTime)) {
      OrigTime <- data$starttimes[[i]]
      programmes <- 0
    }
    events = events + 1
    if (difftime(data$starttimes[[i]],OrigTime,units='hours') > maxdelay) {
      row <- row + 1
      result[[row]] <- data.frame(OrigTime = OrigTime, LastTime = data$starttimes[[i-1]], events = events)
      OrigTime <- data$starttimes[[i]]
    } 
  } 
  dplyr::bind_rows(result)
}


df %>% arrange(UserId,starttimes) %>%
  split(.$UserId) %>%
  map(function(data) {SessionCount(data,1) }) %>%
  bind_rows(.id="UserId")->result

#code which helps me do a sanity check, making sure counts are correct
result$events<-ifelse(result$events>1 & result$OrigTime == result$LastTime, 
                               1,result$events)


count_events <- function(UserId, OrigTime, LastTime, df_new = df) {
  df_new %>% 
    dplyr::filter(UserId== !! UserId, 
                  dplyr::between(starttimes, OrigTime, LastTime)) %>% 
    nrow()
}

result %>% 
  mutate(events_recount = pmap_int(list(UserId, OrigTime, LastTime), count_events),
         mismatch = events != events_recount)->results_new


#####  output ####
  UserId            OrigTime            LastTime events events_recount mismatch
1 0buhGq 2021-02-02 00:24:55 2021-02-02 00:24:55      1              1    FALSE
2 0buhGq 2021-02-02 02:28:25 2021-02-02 02:28:25      1              1    FALSE
3 0buhGq 2021-02-02 04:19:52 2021-02-02 04:40:02      6              3     TRUE
4 0ulN53 2021-02-02 00:02:39 2021-02-02 00:38:59      4              3     TRUE
5 0ulN53 2021-02-02 01:04:46 2021-02-02 01:04:46      1              1    FALSE
6 0ulN53 2021-02-02 02:26:43 2021-02-02 02:31:11      7              2     TRUE

在上面的输出中,events_recount 变量是行数。同样,这适用于一个时间戳列,但不适用于两个时间戳列。 关于如何实现我所要求的目标有什么建议吗?

非常感谢!

编辑

这是我试图通过查看 DF 的前 27 行来实现的输出示例:-

    UserId          starttimes            endtimes
1   0buhGq 2021-02-02 00:24:55 2021-02-02 01:04:45
2   0buhGq 2021-02-02 02:28:25 2021-02-02 02:28:29
3   0buhGq 2021-02-02 04:19:52 2021-02-02 04:21:30
4   0buhGq 2021-02-02 04:23:06 2021-02-02 04:23:11
5   0buhGq 2021-02-02 04:40:02 2021-02-02 04:40:07
6   0buhGq 2021-02-02 05:34:49 2021-02-02 05:35:14
7   0buhGq 2021-02-02 06:04:14 2021-02-02 06:04:30
8   0buhGq 2021-02-02 06:28:43 2021-02-02 06:28:49
9   0buhGq 2021-02-02 06:28:57 2021-02-02 06:29:06
10  0ulN53 2021-02-02 00:02:39 2021-02-02 00:19:58
11  0ulN53 2021-02-02 00:21:03 2021-02-02 00:24:55
12  0ulN53 2021-02-02 00:38:59 2021-02-02 00:39:51
13  0ulN53 2021-02-02 01:04:46 2021-02-02 01:04:59
14  0ulN53 2021-02-02 02:26:43 2021-02-02 05:27:53
15  0ulN53 2021-02-02 02:31:11 2021-02-02 02:52:13
16  0ulN53 2021-02-02 04:41:43 2021-02-02 04:41:48
17  0ulN53 2021-02-02 05:59:32 2021-02-02 05:59:46
18  0wzMkn 2021-02-02 00:04:09 2021-02-02 00:04:46
19  0wzMkn 2021-02-02 01:55:07 2021-02-02 01:55:13
20  0wzMkn 2021-02-02 05:16:33 2021-02-02 05:16:48
21  2diLwk 2021-02-02 00:00:38 2021-02-02 00:00:46
22  2diLwk 2021-02-02 00:25:11 2021-02-02 00:25:50
23  2diLwk 2021-02-02 00:48:32 2021-02-02 00:48:38
24  2diLwk 2021-02-02 02:27:48 2021-02-02 03:02:47
25  2diLwk 2021-02-02 03:10:17 2021-02-02 03:10:32
26  2diLwk 2021-02-02 06:21:40 2021-02-02 06:21:57
27  2diLwk 2021-02-02 06:28:49 2021-02-02 06:28:57

如果在上面的行上完成,那么我想要的输出将如下所示:-

enter image description here

如果前导行中的 starttimes 时间戳比前一行中的 endtimes 时间戳晚 >=1 小时,则对这些时间戳之间的前几行进行计数(如图所示)在我想要的输出图像中)

希望这有帮助!

最佳答案

这是使用 shiftdata.table 方法:

DF <- structure(list(UserId = c("0buhGq", "0buhGq", "0buhGq", "0buhGq", 
"0buhGq", "0buhGq", "0buhGq", "0buhGq", "0buhGq", "0ulN53", "0ulN53", 
"0ulN53", "0ulN53", "0ulN53", "0ulN53", "0ulN53", "0ulN53", "0wzMkn", 
"0wzMkn", "0wzMkn", "2diLwk", "2diLwk", "2diLwk", "2diLwk", "2diLwk", 
"2diLwk", "2diLwk", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", 
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", 
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", 
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", 
"36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", "36NAIB", 
"3942Wm", "3OUdi4", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", 
"4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "4hhSRY", "52LXbG", 
"52LXbG", "52LXbG", "52LXbG", "52LXbG", "52LXbG", "52LXbG", "64gfuI", 
"64gfuI", "6KpHap", "6KpHap", "6KpHap", "6vHa6q", "72MKAc", "72MKAc", 
"72MKAc", "72MKAc", "8RcC8m", "8RcC8m", "8RcC8m", "8RcC8m", "98vV9L", 
"98vV9L", "98vV9L", "98vV9L", "98vV9L", "98vV9L", "98vV9L", "98vV9L", 
"98vV9L", "98vV9L", "9PF5pW", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", 
"aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", 
"aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", "aGsBU0", 
"aGsBU0", "aGsBU0", "aGsBU0", "AUQYey", "AUQYey", "AUQYey", "AUQYey", 
"AUQYey", "B0s81w", "B0s81w", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", 
"BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", "BPaJTw", 
"BWY8Fi", "c3qDX8", "c3qDX8", "c3qDX8", "c3qDX8", "c3qDX8", "c3qDX8", 
"c3qDX8", "c3qDX8", "cIS2B3", "CNTp7f", "CNTp7f", "CNTp7f", "dHBGWg", 
"dHBGWg", "dHBGWg", "dHBGWg", "dQ1kCz", "dQ1kCz", "e2aP4W", "EEnp7x", 
"EEnp7x", "EEnp7x", "eJL9eB", "eJL9eB", "eJL9eB", "eXKdph", "exxLGA", 
"FE1Gg2", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", 
"fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", "fPqjLw", 
"fPqjLw", "GfLwK1", "GfLwK1", "GfLwK1", "GQMw74", "GQMw74", "GSjzKw", 
"hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA", "hA3kcA"
), starttimes = structure(c(1612225495, 1612232905, 1612239592, 
1612239786, 1612240802, 1612244089, 1612245854, 1612247323, 1612247337, 
1612224159, 1612225263, 1612226339, 1612227886, 1612232803, 1612233071, 
1612240903, 1612245572, 1612224249, 1612230907, 1612242993, 1612224038, 
1612225511, 1612226912, 1612232868, 1612235417, 1612246900, 1612247329, 
1612224511, 1612224732, 1612224741, 1612225124, 1612225673, 1612226602, 
1612226633, 1612228148, 1612228574, 1612229782, 1612230617, 1612231030, 
1612231085, 1612231122, 1612231156, 1612231939, 1612234987, 1612236491, 
1612238398, 1612240919, 1612240941, 1612241997, 1612242078, 1612242957, 
1612243896, 1612243977, 1612244150, 1612245811, 1612246720, 1612246923, 
1612247034, 1612247123, 1612247570, 1612227309, 1612243829, 1612225263, 
1612225797, 1612225814, 1612226255, 1612227017, 1612229235, 1612239150, 
1612241630, 1612247047, 1612247085, 1612247217, 1612224887, 1612225648, 
1612227079, 1612230380, 1612245565, 1612246181, 1612246806, 1612241890, 
1612245854, 1612225689, 1612229282, 1612242957, 1612239156, 1612224511, 
1612224522, 1612225166, 1612247820, 1612224358, 1612231137, 1612235046, 
1612247226, 1612224076, 1612224232, 1612225673, 1612226912, 1612228533, 
1612229434, 1612231023, 1612232915, 1612235398, 1612243431, 1612239692, 
1612224286, 1612224309, 1612225511, 1612225845, 1612225919, 1612226267, 
1612228574, 1612228937, 1612230201, 1612230395, 1612230838, 1612231030, 
1612231657, 1612232159, 1612232761, 1612235182, 1612239699, 1612241642, 
1612243227, 1612245164, 1612247272, 1612224065, 1612235409, 1612241800, 
1612246929, 1612246937, 1612226922, 1612228937, 1612224085, 1612224662, 
1612224856, 1612225316, 1612226938, 1612227404, 1612227447, 1612228412, 
1612228597, 1612235046, 1612241997, 1612227060, 1612224348, 1612227235, 
1612234517, 1612240919, 1612245430, 1612245749, 1612247129, 1612247337, 
1612228148, 1612226677, 1612231105, 1612243991, 1612225685, 1612225878, 
1612227191, 1612229187, 1612224085, 1612230981, 1612224286, 1612224067, 
1612224427, 1612241971, 1612227060, 1612228351, 1612231023, 1612241649, 
1612243036, 1612226655, 1612224398, 1612225668, 1612226677, 1612227337, 
1612227390, 1612231017, 1612231085, 1612232375, 1612232761, 1612240941, 
1612241749, 1612245893, 1612247016, 1612247031, 1612224695, 1612230535, 
1612240808, 1612225648, 1612229235, 1612244130, 1612224443, 1612224480, 
1612224567, 1612225154, 1612227317, 1612227547, 1612229762), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), endtimes = structure(c(1612227885, 
1612232909, 1612239690, 1612239791, 1612240807, 1612244114, 1612245870, 
1612247329, 1612247346, 1612225198, 1612225495, 1612226391, 1612227899, 
1612243673, 1612234333, 1612240908, 1612245586, 1612224286, 1612230913, 
1612243008, 1612224046, 1612225550, 1612226918, 1612234967, 1612235432, 
1612246917, 1612247337, 1612224520, 1612224741, 1612224878, 1612225246, 
1612225680, 1612226607, 1612226656, 1612228412, 1612228586, 1612230487, 
1612230631, 1612231203, 1612231105, 1612231137, 1612231171, 1612232151, 
1612235005, 1612236571, 1612245037, 1612240935, 1612240947, 1612242008, 
1612242118, 1612243144, 1612244074, 1612243986, 1612244278, 1612245846, 
1612247815, 1612246929, 1612247041, 1612247127, 1612247739, 1612227317, 
1612243866, 1612225495, 1612225813, 1612225845, 1612226266, 1612227024, 
1612229282, 1612239156, 1612241680, 1612247059, 1612247094, 1612247222, 
1612224906, 1612225661, 1612227235, 1612230405, 1612245572, 1612247552, 
1612246888, 1612241961, 1612245870, 1612225695, 1612229761, 1612243144, 
1612239162, 1612224533, 1612224537, 1612225185, 1612247833, 1612224374, 
1612231143, 1612246048, 1612247226, 1612224152, 1612225636, 1612225680, 
1612226918, 1612230888, 1612229722, 1612231029, 1612232966, 1612235409, 
1612243995, 1612239696, 1612224377, 1612224673, 1612225550, 1612230662, 
1612225925, 1612226277, 1612228586, 1612228955, 1612230212, 1612230609, 
1612231489, 1612231203, 1612231672, 1612232375, 1612232803, 1612235187, 
1612239704, 1612241649, 1612243227, 1612245199, 1612247272, 1612224156, 
1612235413, 1612241818, 1612246933, 1612246943, 1612226930, 1612228955, 
1612224621, 1612225487, 1612224892, 1612225423, 1612226965, 1612227417, 
1612227461, 1612228444, 1612228607, 1612246048, 1612242008, 1612227178, 
1612224353, 1612227245, 1612238334, 1612240935, 1612245439, 1612245785, 
1612247135, 1612247346, 1612228163, 1612227404, 1612231121, 1612244002, 
1612225727, 1612227245, 1612227204, 1612229226, 1612224203, 1612230998, 
1612224377, 1612224081, 1612225814, 1612241976, 1612227178, 1612228356, 
1612231029, 1612241740, 1612243041, 1612227231, 1612224426, 1612225683, 
1612227404, 1612227346, 1612227397, 1612231022, 1612231105, 1612232381, 
1612232803, 1612240947, 1612241755, 1612245899, 1612247021, 1612247220, 
1612225301, 1612230673, 1612240884, 1612225661, 1612229282, 1612244136, 
1612224582, 1612224662, 1612233387, 1612226208, 1612227329, 1612228048, 
1612229782), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, 
200L), class = "data.frame")

library(data.table)

setDT(DF)

DF <- unique(DF[, session_id := cumsum(starttimes-shift(endtimes, n=1L, fill=first(starttimes), type="lag") >= 3600), by = UserId][
                , c("count", "starttimes", "endtimes") := .(.N, min(starttimes), max(endtimes)), by = .(UserId, session_id)][
                , session_id := NULL])

head(DF, 10)

> head(DF, 10)
    UserId          starttimes            endtimes count
 1: 0buhGq 2021-02-02 00:24:55 2021-02-02 01:04:45     1
 2: 0buhGq 2021-02-02 02:28:25 2021-02-02 02:28:29     1
 3: 0buhGq 2021-02-02 04:19:52 2021-02-02 06:29:06     7
 4: 0ulN53 2021-02-02 00:02:39 2021-02-02 01:04:59     4
 5: 0ulN53 2021-02-02 02:26:43 2021-02-02 05:27:53     2
 6: 0ulN53 2021-02-02 04:41:43 2021-02-02 04:41:48     1
 7: 0ulN53 2021-02-02 05:59:32 2021-02-02 05:59:46     1
 8: 0wzMkn 2021-02-02 00:04:09 2021-02-02 00:04:46     1
 9: 0wzMkn 2021-02-02 01:55:07 2021-02-02 01:55:13     1
10: 0wzMkn 2021-02-02 05:16:33 2021-02-02 05:16:48     1

基准:

Unit: milliseconds
          expr       min        lq      mean    median        uq       max neval
           shs 73.507502 73.507502 73.507502 73.507502 73.507502 73.507502     1
 ismirsehregal  9.535402  9.535402  9.535402  9.535402  9.535402  9.535402     1

基准代码:

library(lubridate)
library(dplyr)
library(data.table)
library(microbenchmark)

DFdata.table <- DFdplyr <- DF

microbenchmark(shs = {
  DFdplyr %>% 
    group_by(UserId) %>% 
    mutate(grp = {starttimes - lag(endtimes)} %>% 
             {. >= hours(1)} %>%
             ifelse(is.na(.), 0, .) %>% 
             cumsum()
    ) %>% 
    group_by(UserId, grp) %>% 
    summarize(starttimes = first(starttimes), 
              endtimes = last(endtimes),
              count = n())
},
ismirsehregal = {
  setDT(DF)
  DFdata.table <- unique(DFdata.table[, session_id := cumsum(starttimes-shift(endtimes, n=1L, fill=first(starttimes), type="lag") >= 3600), by = UserId][
    , c("count", "starttimes", "endtimes") := .(.N, min(starttimes), max(endtimes)), by = .(UserId, session_id)][
      , session_id := NULL])
}, times = 1L)

关于r - 在 R 中按组有条件地计算两个日期时间列之间的行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68822928/

相关文章:

r - 使用 xts 创建循环

r - 将带有计数的宽数据帧转换为R中的长格式

线性回归中的 R 分类变量

javascript - YouTube Live Streaming API 'scheduledStartTime' 向观众显示错误的时区

带有 DateTime[] 的 C# 数组

r - 如何提取最长的匹配?

Javascript:以秒为单位查找日期时间对象与当前时间之间的差异

php - 使用 PHP 从动态表中计数

mysql - 如何获取每月每天的地址数量

shell - Linux下如何计算两个文件之间的差异?