r - 合并列表中的数据框

标签 r list dataframe

我有一个看起来像这样的列表,

lapply(sample_list, head, 3)

$`2016-04-24 00:00:00.tcp`
   ports freq
8    443  296
12    80  170
5     23   92

$`2016-04-24 00:00:00.udp`
  ports freq
4   161  138
7    53   45
1   123   28

$`2016-04-24 01:00:00.tcp`
   ports freq
13   443  342
20    80  215
10    25   60

$`2016-04-24 01:00:00.udp`
   ports freq
4    161   85
8     53   42
12   902   27

我想合并来自同一协议(protocol)的数据帧(即tcp在一起和udp在一起) 所以最终结果将是一个包含 2 个数据框的新列表;一种用于 tcp,一种用于 udp,这样,

lapply(final_list, head, 3)

$tcp
  ports freq.00:00:00 freq.01:00:00
1   443           296           342
2    80           170           215
3    23            92            51

$udp
  ports freq.00:00:00 freq.01:00:00
1   161           138            85
2    53            45            42
3   123            28            19

数据

dput(sample_list)
structure(list(`2016-04-24 00:00:00.tcp` = structure(list(ports = c("443", 
"80", "23", "21", "22", "25", "445", "110", "389", "135", "465", 
"514", "91", "995", "84", "902"), freq = structure(c(296L, 170L, 
92L, 18L, 16L, 15L, 14L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Dim = 16L)), .Names = c("ports", 
"freq"), row.names = c(8L, 12L, 5L, 3L, 4L, 6L, 9L, 1L, 7L, 2L, 
10L, 11L, 15L, 16L, 13L, 14L), class = "data.frame"), `2016-04-24 00:00:00.udp` = structure(list(
    ports = c("161", "53", "123", "902", "137", "514", "138", 
    "623", "69", "88", "500"), freq = structure(c(138L, 45L, 
    28L, 26L, 24L, 24L, 6L, 6L, 5L, 4L, 1L), .Dim = 11L)), .Names = c("ports", 
"freq"), row.names = c(4L, 7L, 1L, 11L, 2L, 6L, 3L, 8L, 9L, 10L, 
5L), class = "data.frame"), `2016-04-24 01:00:00.tcp` = structure(list(
    ports = c("443", "80", "25", "23", "88", "21", "161", "22", 
    "445", "135", "389", "993", "548", "110", "143", "502", "514", 
    "81", "995", "102", "111", "311", "444", "789", "902", "91"
    ), freq = structure(c(342L, 215L, 60L, 51L, 42L, 32L, 31L, 
    18L, 18L, 6L, 5L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Dim = 26L)), .Names = c("ports", "freq"
), row.names = c(13L, 20L, 10L, 9L, 22L, 7L, 6L, 8L, 15L, 4L, 
12L, 25L, 18L, 2L, 5L, 16L, 17L, 21L, 26L, 1L, 3L, 11L, 14L, 
19L, 23L, 24L), class = "data.frame"), `2016-04-24 01:00:00.udp` = structure(list(
    ports = c("161", "53", "902", "514", "123", "137", "69", 
    "138", "389", "443", "88", "623"), freq = structure(c(85L, 
    42L, 27L, 24L, 19L, 15L, 15L, 4L, 2L, 2L, 2L, 1L), .Dim = 12L)), .Names = c("ports", 
"freq"), row.names = c(4L, 8L, 12L, 7L, 1L, 2L, 10L, 3L, 5L, 
6L, 11L, 9L), class = "data.frame")), .Names = c("2016-04-24 00:00:00.tcp", 
"2016-04-24 00:00:00.udp", "2016-04-24 01:00:00.tcp", "2016-04-24 01:00:00.udp"
))

额外问题:freq 的结构是什么?我以前从未见过int [1:16(1d)]

str(sample_list$`2016-04-24 00:00:00.tcp`)
'data.frame':   16 obs. of  2 variables:
 $ ports: chr  "443" "80" "23" "21" ...
 $ freq : int [1:16(1d)] 296 170 92 18 16 15 14 4 3 2 ...

我用来创建列表的代码(在本例中称为 try1)

protocol_list <- lapply(per_hour1, function(i) split(i, i$protocol))
Analytic_Protocol_List <- lapply(protocol_list, function(i) lapply(i, dest.ports))
try1 <- lapply(unlist(Analytic_Protocol_List, recursive = FALSE), `[[`, 1)

请注意来自 similar questions 的解决方案不适用于这种情况。也许是因为结构?

最佳答案

另一种选择:

library(dplyr)
library(tidyr)

data.table::melt(sample_list) %>%
  separate(L1, into = c("time", "protocol"), sep = "\\.") %>%
  unite(f, variable, time) %>%
  spread(f, value) %>%
  split(.$protocol)

使用您的数据,可以得到:

$tcp
   ports protocol freq_2016-04-24 00:00:00 freq_2016-04-24 01:00:00
1    102      tcp                       NA                        1
2    110      tcp                        4                        2
3    111      tcp                       NA                        1
5    135      tcp                        2                        6
8    143      tcp                       NA                        2
9    161      tcp                       NA                       31
11    21      tcp                       18                       32
12    22      tcp                       16                       18
13    23      tcp                       92                       51
14    25      tcp                       15                       60
15   311      tcp                       NA                        1
16   389      tcp                        3                        5
18   443      tcp                      296                      342
20   444      tcp                       NA                        1
21   445      tcp                       14                       18
22   465      tcp                        2                       NA
24   502      tcp                       NA                        2
25   514      tcp                        2                        2
28   548      tcp                       NA                        3
31   789      tcp                       NA                        1
32    80      tcp                      170                      215
33    81      tcp                       NA                        2
34    84      tcp                        1                       NA
35    88      tcp                       NA                       42
37   902      tcp                        1                        1
39    91      tcp                        2                        1
40   993      tcp                       NA                        4
41   995      tcp                        2                        2

$udp
   ports protocol freq_2016-04-24 00:00:00 freq_2016-04-24 01:00:00
4    123      udp                       28                       19
6    137      udp                       24                       15
7    138      udp                        6                        4
10   161      udp                      138                       85
17   389      udp                       NA                        2
19   443      udp                       NA                        2
23   500      udp                        1                       NA
26   514      udp                       24                       24
27    53      udp                       45                       42
29   623      udp                        6                        1
30    69      udp                        5                       15
36    88      udp                        4                        2
38   902      udp                       26                       27

更新:

如果你想按freq排序,你可以这样做:

data.table::melt(sample_list) %>%
  separate(L1, into = c("time", "protocol"), sep = "\\.") %>%
  unite(f, variable, time) %>%
  spread(f, value) %>%
  arrange(protocol, desc(`freq_2016-04-24 00:00:00`)) %>%
  split(.$protocol) 

关于r - 合并列表中的数据框,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37703707/

相关文章:

Java:将具有重复项的复杂对象中的两个列表合并为一个有序列表

list - LISP:如何从用户那里读取数字并将其存储为列表

python - 需要将整个列从字符串格式转换为 Dataframe 的日期格式

r - 组合ggplot中的图例位置

r - R 包中的 ggplot2 : Notes during CRAN tests

r - dygraphs 不显示超过 10,000 个数据点的线条

r - 处理文件名中的点

.net - 不同类型的用户控件的 WPF 列表

基于多列重新编码数据框列

python - 如何将几个类似的 .csv 文件合并到一个具有给定结构的数据框中