r - 按组消除不存在某些因子值的行

标签 r data.table

我有一个如下所示的表格(数据发布在下面):

我想排除所有 iso3c (三个字母代码)其中 var没有至少三个值(即 0、1、2、3)。例如,对于 AGO唯一的值是 0 。所以这个需要走。下一篇ALB可以保留,因为该值上升到 3 。我想做类似的事情,setDT(DT)[max(as.numeric(as.character(var))) <3, iso3c:=NA, by="iso3c"] 。但这对数据没有任何影响。

有什么想法吗?

table(DT)

var AGO ALB ARG ARM AZE BDI BEN BFA BGR BiH BLR BOL BRA BWA CHL CHN CMR COD COL CPV CRI Czech DEU DOM ECU ESP EST GEO GIN GMB GRC GTM GUY HND HRV HUN IDN IND IRL JAM JOR
  0  10   4  10   8   7   9   3   9   2   5   7   9   3   8   9   1  10  10  10   9   5     3   7   3   8   6   6   2  10  10   5   9   6   8   6   8   6   7   6   1  10
  1   0   3   0   2   3   1   2   1   2   3   3   1   1   2   0   3   0   0   0   1   1     3   2   0   0   0   2   2   0   0   5   0   0   1   3   1   1   2   2   0   0
  2   0   1   0   0   0   0   2   0   3   2   0   0   2   0   1   4   0   0   0   0   2     3   0   1   0   3   2   2   0   0   0   0   2   0   0   0   0   0   0   4   0
  3   0   2   0   0   0   0   2   0   2   0   0   0   2   0   0   2   0   0   0   0   0     1   1   5   0   1   0   4   0   0   0   0   2   0   1   1   2   0   2   2   0
  4   0   0   0   0   0   0   1   0   1   0   0   0   2   0   0   0   0   0   0   0   2     0   0   1   2   0   0   0   0   0   0   1   0   1   0   0   1   1   0   3   0
   iso3c
var KAZ KEN KGZ KHM KOR LAO LBN LKA LSO LTU LVA MAR MDA MDG MEX MKD MLI MNE MNG MRT MUS MWI NAM NIC PAN PER PHL POL PRT PRY PSE ROU RUS SEN Serbia&Montenegro SLV SRB SVK
  0  10   1   2   2   8   2   4   5   0   3   4   7   4   1   9   3   4   2   4   9   5   0   5  10  10   7   7   6   4   9  10   4   2   4                 3   8   3   7
  1   0   0   4   2   2   4   2   3   1   4   1   0   3   2   1   3   1   1   2   1   1   0   5   0   0   2   0   3   2   1   0   3   3   5                 4   2   3   2
  2   0   1   1   2   0   3   1   1   3   3   4   1   3   5   0   2   2   4   1   0   1   1   0   0   0   0   1   0   2   0   0   2   5   1                 1   0   2   0
  3   0   4   3   3   0   1   2   1   3   0   1   0   0   2   0   2   1   2   2   0   3   2   0   0   0   1   2   1   2   0   0   1   0   0                 2   0   1   1
  4   0   4   0   1   0   0   1   0   3   0   0   2   0   0   0   0   2   1   1   0   0   7   0   0   0   0   0   0   0   0   0   0   0   0                 0   0   1   0
   iso3c
var SVN SWZ TJK TUR TZA UGA UKR URY UZB VNM ZAF ZMB
  0   7  10   6   2   7   9   5  10   7   6   2   0
  1   0   0   3   3   2   1   1   0   3   3   2   4
  2   2   0   1   3   0   0   3   0   0   1   2   1
  3   1   0   0   0   1   0   1   0   0   0   1   3
  4   0   0   0   2   0   0   0   0   0   0   3   2

数据

DT <- structure(list(var= structure(c(2, 0, 0, 3, 4, 
4, 1, 0, 2, 3, 1, 2, 3, 4, 2, 3, 3, 0, 0, 1, 3, 2, 3, 2, 2, 0, 
2, 1, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 
0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 3, 4, 3, 
0, 0, 3, 4, 3, 4, 0, 4, 4, 3, 3, 2, 0, 2, 0, 2, 3, 2, 3, 1, 2, 
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 3, 0, 
0, 0, 3, 0, 0, 0, 2, 3, 0, 0, 1, 2, 0, 4, 2, 1, 0, 1, 3, 0, 0, 
1, 1, 0, 2, 0, 1, 0, 3, 0, 0, 0, 3, 0, 1, 0, 0, 1, 0, 0, 3, 1, 
1, 4, 1, 2, 3, 1, 4, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 4, 2, 
1, 1, 4, 3, 2, 0, 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 
0, 0, 2, 0, 0, 3, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 0, 4, 0, 3, 0, 
1, 2, 4, 2, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 1, 3, 0, 2, 3, 0, 0, 
3, 1, 0, 0, 2, 4, 0, 4, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2, 3, 3, 
1, 1, 2, 3, 3, 3, 1, 1, 1, 1, 0, 0, 2, 4, 4, 3, 4, 3, 2, 4, 4, 
4, 4, 3, 3, 1, 4, 2, 0, 2, 1, 0, 0, 1, 0, 2, 0, 1, 0, 0, 2, 3, 
3, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 3, 0, 0, 1, 3, 0, 1, 4, 2, 0, 
0, 0, 1, 4, 0, 0, 0, 2, 2, 4, 4, 0, 3, 1, 1, 0, 3, 0, 0, 2, 1, 
3, 4, 3, 2, 2, 4, 4, 3, 2, 3, 4, 3, 4, 0, 4, 2, 2, 2, 2, 0, 1, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 
0, 0, 3, 3, 2, 3, 4, 3, 1, 1, 0, 2, 0, 3, 1, 0, 2, 0, 0, 3, 1, 
2, 1, 1, 3, 0, 0, 0, 1, 0, 0, 1, 1, 3, 0, 0, 0, 2, 1, 0, 1, 2, 
0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 3, 0, 1, 0, 1, 3, 1, 3, 
1, 0, 2, 0, 1, 0, 2, 1, 4, 4, 1, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 
1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 3, 0, 3, 0, 0, 1, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0, 3, 0, 0, 2, 2, 1, 0, 1, 1, 
2, 1, 0, 0, 2, 1, 2, 1, 2, 0, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 0, 
2, 1, 3, 1, 0, 2, 2, 0, 4, 3, 3, 0, 2, 2, 0, 1, 2, 0, 0, 2, 0, 
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 
2, 0, 0, 0, 2, 1, 0, 0, 3, 3, 0, 3, 2, 0, 1, 1, 2, 3, 2, 1, 1, 
0, 2, 1, 0, 1, 0, 2, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 1, 0, 1, 2, 
1, 0, 2, 2, 2, 0, 0, 2, 2, 1, 0, 2, 0, 0, 3, 2, 2, 0, 0, 0, 0, 
2, 3, 0, 0, 1, 0, 0, 1, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 1, 2, 1, 3, 
0, 2), label = "label", format.stata = "%20.0f", class = c("haven_labelled", 
"vctrs_vctr", "double"), labels = c(`No obstacle` = 0, `Minor obstacle` = 1, 
`Moderate obstacle` = 2, `Major obstacle` = 3, `Very Severe Obstacle` = 4
)), iso3c = structure(c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 
37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 43L, 43L, 43L, 
43L, 43L, 43L, 43L, 43L, 43L, 43L, 59L, 59L, 59L, 59L, 59L, 59L, 
59L, 59L, 59L, 59L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 
65L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 68L, 68L, 
68L, 68L, 68L, 68L, 68L, 68L, 68L, 68L, 78L, 78L, 78L, 78L, 78L, 
78L, 78L, 78L, 78L, 78L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 
49L, 49L, 84L, 84L, 84L, 84L, 84L, 84L, 84L, 84L, 84L, 84L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 77L, 77L, 77L, 77L, 
77L, 77L, 77L, 77L, 77L, 77L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 75L, 75L, 75L, 
75L, 75L, 75L, 75L, 75L, 75L, 75L, 58L, 58L, 58L, 58L, 58L, 58L, 
58L, 58L, 58L, 58L, 89L, 89L, 89L, 89L, 89L, 89L, 89L, 89L, 89L, 
89L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 53L, 53L, 
53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 55L, 55L, 55L, 55L, 55L, 
55L, 55L, 55L, 55L, 55L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 
44L, 44L, 63L, 63L, 63L, 63L, 63L, 63L, 63L, 63L, 63L, 63L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 70L, 70L, 70L, 70L, 70L, 
70L, 70L, 70L, 70L, 70L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 
31L, 31L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 60L, 60L, 60L, 60L, 
60L, 60L, 60L, 60L, 60L, 60L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 
50L, 50L, 50L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 17L, 17L, 17L, 17L, 17L, 
17L, 17L, 17L, 17L, 17L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 18L, 18L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
61L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 85L, 85L, 85L, 
85L, 85L, 85L, 85L, 85L, 85L, 85L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 
64L, 81L, 81L, 81L, 81L, 81L, 81L, 81L, 81L, 81L, 81L, 29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 56L, 56L, 
56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 66L, 66L, 66L, 66L, 66L, 
66L, 66L, 66L, 66L, 66L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 87L, 
87L, 87L, 71L, 71L, 71L, 71L, 71L, 71L, 71L, 71L, 71L, 71L, 38L, 
38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 72L, 72L, 72L, 72L, 
72L, 72L, 72L, 72L, 72L, 72L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 35L, 35L, 35L, 35L, 35L, 35L, 35L, 
35L, 35L, 35L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 76L, 76L, 76L, 
76L, 76L, 76L, 76L, 76L, 76L, 76L, 83L, 83L, 83L, 83L, 83L, 83L, 
83L, 83L, 83L, 83L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 69L, 69L, 69L, 69L, 69L, 69L, 69L, 69L, 69L, 69L, 36L, 36L, 
36L, 36L, 36L, 36L, 36L, 36L, 36L, 36L, 79L, 79L, 79L, 79L, 79L, 
79L, 79L, 79L, 79L, 79L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 
22L, 22L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 73L, 
73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
52L, 52L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 82L, 82L, 82L, 
82L, 82L, 82L, 82L, 82L, 82L, 82L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 88L, 88L, 88L, 88L, 88L, 88L, 88L, 88L, 88L, 
88L, 74L, 74L, 74L, 74L, 74L, 74L, 74L, 74L, 74L, 74L, 86L, 86L, 
86L, 86L, 86L, 86L, 86L, 86L, 86L, 86L, 26L, 26L, 26L, 26L, 26L, 
26L, 26L, 26L, 26L, 26L, 39L, 39L, 39L, 39L, 39L, 39L, 39L, 39L, 
39L, 39L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 46L, 
46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 47L, 47L, 47L, 47L, 
47L, 47L, 47L, 47L, 47L, 47L), .Label = c("AGO", "ALB", "ARG", 
"ARM", "AZE", "BDI", "BEN", "BFA", "BGR", "BiH", "BLR", "BOL", 
"BRA", "BWA", "CHL", "CHN", "CMR", "COD", "COL", "CPV", "CRI", 
"Czech", "DEU", "DOM", "ECU", "ESP", "EST", "GEO", "GIN", "GMB", 
"GRC", "GTM", "GUY", "HND", "HRV", "HUN", "IDN", "IND", "IRL", 
"JAM", "JOR", "KAZ", "KEN", "KGZ", "KHM", "KOR", "LAO", "LBN", 
"LKA", "LSO", "LTU", "LVA", "MAR", "MDA", "MDG", "MEX", "MKD", 
"MLI", "MNE", "MNG", "MRT", "MUS", "MWI", "NAM", "NIC", "PAN", 
"PER", "PHL", "POL", "PRT", "PRY", "PSE", "ROU", "RUS", "SEN", 
"Serbia&Montenegro", "SLV", "SRB", "SVK", "SVN", "SWZ", "TJK", 
"TUR", "TZA", "UGA", "UKR", "URY", "UZB", "VNM", "ZAF", "ZMB"
), class = "factor")), row.names = c(NA, -910L), class = c("data.table", 
"data.frame"))

最佳答案

按“iso3c”分组,获取“var”中唯一元素的计数,从中创建一个逻辑向量,获取行索引 (.I) 和基于该列的子集

DT1 <- droplevels(DT[DT[, .I[uniqueN(var) >=3],iso3c]$V1])

关于r - 按组消除不存在某些因子值的行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/66785543/

相关文章:

r - 使用 MICE 进行多重插补后,如何在 R 中的子集上使用 lm() 运行线性回归

r - 如果某些行满足条件,则使用 R 中的数据表标记组中的所有行

r - 根据特定列上的 rep 函数在 R 中的行中查找序列

r - 使用 MLR 包调整 randomForest 截止值

r - "object ' ansvals ' not found"错误 - 这是什么意思?

r - 根据分组从列中减去值

r - 使用 fread 导入数据后所有列均作为字符

r - 有条件地将一个列列表替换为其他列列表

r - 带有 arules/apriori 的 rhs 过滤器不起作用

r - 如何添加前导零?