r - 使用 R 中的汇总查找汇总度量、非 NA 值和特定值进行分组

标签 r dataframe dplyr

我有这个示例数据集 (df)

structure(list(from = c("(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452"), to = c("(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542"), extension = c("9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls"), forwarded_to = c(NA, "(323) 457-2039", 
"(310) 598-1753", "(818) 900-0706", "(818) 539-7811", "(213) 344-4965", 
"(213) 458-7662", "(818) 208-3012", "(818) 293-0175", "(818) 284-4238", 
"(818) 206-9506", "(310) 299-7340", "(310) 405-0875", "(213) 260-9113", 
"(213) 805-5208", "(818) 887-3058", "(424) 271-2141", "(213) 218-6579", 
"(818) 638-9466", "(213) 784-7164", "(323) 457-2038", "(213) 805-6959", 
"(228) 285-7898", "(213) 341-1055", "(213) 568-0979", "(213) 344-4905", 
"(818) 459-3811", NA, "(323) 457-2039", "(310) 598-1753", "(818) 900-0706", 
"(818) 539-7811", "(213) 344-4965", "(213) 458-7662", "(818) 208-3012", 
"(818) 293-0175", "(818) 284-4238", "(818) 206-9506", "(310) 299-7340", 
"(310) 405-0875", "(213) 260-9113", "(213) 805-5208", "(818) 887-3058", 
"(424) 271-2141", "(213) 218-6579", "(818) 638-9466", "(213) 784-7164", 
"(323) 457-2038", "(213) 805-6959", "(228) 285-7898", "(213) 341-1055", 
"(213) 568-0979", "(213) 344-4905", "(818) 459-3811"), date = c("Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018"), time = structure(c(55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55500, 55500, 
55500, 55500, 55500, 55500, 55500, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55380, 55380, 
55380, 55380, 55380, 55380, 55380), class = c("hms", "difftime"
), units = "secs"), action = c("Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe"), action_result = c("Accepted", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "IP Phone Offline", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "Stopped", 
"Stopped", "IP Phone Offline", "Stopped", "Stopped", "Call connected", 
"Stopped", "Accepted", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"IP Phone Offline", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "Stopped", "Stopped", "IP Phone Offline", "Call connected", 
"Stopped", "Stopped", "Stopped"), result_description = c("The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
NA, NA, "The call connected to and was accepted by this number.", 
NA, "The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
"The call connected to and was accepted by this number.", NA, 
NA, NA), duration = structure(c(297, 52, 52, 51, 51, 51, 51, 
51, 51, 0, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 9, 9, 0, 9, 
9, 236, 9, 71, 52, 52, 52, 51, 51, 51, 51, 51, 0, 51, 51, 51, 
51, 51, 51, 51, 51, 51, 51, 7, 7, 0, 13, 7, 7, 7), class = c("hms", 
"difftime"), units = "secs"), ID = c(19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), CallConnected = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), who_answered = c("NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "(213) 344-4905", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "(213) 341-1055", "NA", 
"NA", "NA")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-54L))

这显示调用电话和接听电话的电话号码的通话数据。 完整数据集有更多的数字对,但为了简洁起见,我只发布了一对

我想查找每对号码中 CallConnectedTRUE 的号码,即 who_answered 中的第一个电话号码,以及与调用已连接对应的持续时间值。我想过使用 group_bysummarise 但我想不出办法。

所需的输出如下所示:

from          | to           | CallConnected | WhoAnswered  | Duration
----------------------------------------------------------------------
(122) 212-3452|(700) 890-6542| 2             |(213) 344-4905| 00:03:56

最佳答案

使用dplyr,我们可以通过取TRUE值的sum来计算CallConnected的数量,找到第一个值非“NA”who_answered 列和 duration 中的第一个 TRUE 值。

library(dplyr)

df %>%
  group_by(from, to) %>%
  summarise(count_call = sum(CallConnected), 
            who_answered = who_answered[which.max(who_answered != "NA")],
            Duration = duration[which.max(CallConnected)])


#  from           to             count_call who_answered   Duration
#  <chr>          <chr>               <int> <chr>          <time>  
#1 (122) 212-3452 (700) 890-6542          2 (213) 344-4905 03'56" 

关于r - 使用 R 中的汇总查找汇总度量、非 NA 值和特定值进行分组,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55539147/

相关文章:

r - 使用 dplyr 管道在 ggplot2 中动态 ylim

javascript - 单击或悬停时, react 性地更改信息框的颜色

r - 在 R 传单中添加许多折线

python - 在 pandas 中,如何在对 DataFrame 求和时获得 DataFrame 作为输出

r - 组合 dplyr 中的 "mutate"和 "across"以及 stringr 中的函数

r - 使用 dplyr 截断数字变量的顶部和底部百分位数

r - 在 R 中对以函数作为参数的函数进行积分

r - 将数据框转换为R中的附属网络

r - 在 R 中交织两个 data.frames

r - 为什么将行名更改为相同时相同的数据帧会变得不同