几个小时以来,我一直在努力解决这个问题,但我似乎无法找到我正在寻找的解决方案。如何在 R 中过滤数据框以获取特定的唯一值,然后使用满足该条件的列名创建新列?
我有这个数据框:
dput(head(df1,10))
structure(list(WMA = c("20", "19", "19", "19", "18", "19", "20",
"20", "20", "19"), Waterbody = c("02040201070010-01", "02040202060040-01",
"02040202060050-01", "02040202060060-01", "02040202150070-01",
"02040202030080-01", "02040201080010-01", "02040201080020-01",
"02040201080030-01", "02040202070010-01"), Name = c("Back Creek (above Yardville-H Sq Road)",
"Barton Run (above Kettle Run Road)", "Barton Run (below Kettle Run Road)",
"Bear Swamp River", "Birch Creek", "Bisphams Mill Creek (below McDonalds Br)",
"Blacks Creek (above 40d06m10s)", "Blacks Creek (Bacons Run to 40d06m10s)",
"Blacks Creek (below Bacons Run)", "Bobbys Run"), DO = c("Insufficient Data",
"Non-attaining", "Non-attaining", "Insufficient Data", "Attaining",
"Attaining", "Attaining", "Attaining", "Attaining", "Insufficient Data"
), `DO Trout` = c("N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A",
"N/A", "N/A", "N/A"), pH = c("Insufficient Data", "Non-attaining",
"Non-attaining", "Insufficient Data", "Attaining", "Attaining",
"Attaining", "Attaining", "Attaining", "Insufficient Data"),
`Total Phosphorus` = c("Non-attaining", "Attaining", "Non-attaining",
"Insufficient Data", "Insufficient Data", "Non-attaining",
"Non-attaining", "Non-attaining", "Non-attaining", "Insufficient Data"
), Nitrate = c("Attaining", "Attaining", "Attaining", "Insufficient Data",
"Insufficient Data", "Attaining", "Attaining", "Attaining",
"Attaining", "Insufficient Data"), `Total Suspended Solids` = c("Attaining",
"Attaining", "Attaining", "Insufficient Data", "Insufficient Data",
"Insufficient Data", "Attaining", "Attaining", "Non-attaining",
"Insufficient Data"), `Total Dissolved Solids` = c("Insufficient Data",
"Attaining", "Attaining", "Insufficient Data", "Insufficient Data",
"Insufficient Data", "Attaining", "Attaining", "Attaining",
"Insufficient Data"), Turbidity = c("Insufficient Data",
"Attaining", "Attaining", "Insufficient Data", "Attaining",
"Insufficient Data", "Attaining", "Attaining", "Attaining",
"Insufficient Data"), `Unionized Ammonia` = c("Attaining",
"Attaining", "Attaining", "Insufficient Data", "Attaining",
"Insufficient Data", "Attaining", "Attaining", "Attaining",
"Insufficient Data"), `Unionized Ammonia Trout` = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"
), E.coli = c("Insufficient Data", "Attaining", "Attaining",
"Attaining", "Attaining", "Attaining", "Attaining", "Non-attaining",
"Non-attaining", "Attaining"), Enterococcus = c("N/A", "N/A",
"N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"),
`Total Coliform` = c("N/A", "N/A", "N/A", "N/A", "N/A", "N/A",
"N/A", "N/A", "N/A", "N/A")), .Names = c("WMA", "Waterbody",
"Name", "DO", "DO Trout", "pH", "Total Phosphorus", "Nitrate",
"Total Suspended Solids", "Total Dissolved Solids", "Turbidity",
"Unionized Ammonia", "Unionized Ammonia Trout", "E.coli", "Enterococcus",
"Total Coliform"), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
我想对每一列执行筛选以查看哪些列具有唯一值“未达到”。然后我想基于显示满足此条件的列名的列创建一个新列。
这是我想要的电子表格:
最佳答案
如果你想要一个 tidyverse 解决方案,你可以:
library(dplyr)
library(tidyr)
df1 %>%
select(WMA, Waterbody, Name) %>%
left_join(df1 %>%
gather(ColName, Value, -WMA, -Waterbody, -Name) %>%
filter(grepl("Non-attaining", Value, ignore.case = TRUE, perl = TRUE)) %>%
group_by(WMA, Waterbody, Name) %>%
summarise(Imp = paste(ColName, collapse = ',')) %>%
ungroup(), by = c("WMA", "Waterbody", "Name"))
这给出:
# A tibble: 10 x 4
WMA Waterbody Name Imp
<chr> <chr> <chr> <chr>
1 20 02040201070010-01 Back Creek (above Yardville-H Sq Road) Total Phosphorus
2 19 02040202060040-01 Barton Run (above Kettle Run Road) DO,pH
3 19 02040202060050-01 Barton Run (below Kettle Run Road) DO,pH,Total Phosphorus
4 19 02040202060060-01 Bear Swamp River NA
5 18 02040202150070-01 Birch Creek NA
6 19 02040202030080-01 Bisphams Mill Creek (below McDonalds Br) Total Phosphorus
7 20 02040201080010-01 Blacks Creek (above 40d06m10s) Total Phosphorus
8 20 02040201080020-01 Blacks Creek (Bacons Run to 40d06m10s) Total Phosphorus,E.coli
9 20 02040201080030-01 Blacks Creek (below Bacons Run) Total Phosphorus,Total Suspended Solids,E.coli
10 19 02040202070010-01 Bobbys Run NA
关于r - 根据唯一值过滤整个数据框并在 R 中创建一个新列,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51828951/