我正在考虑根据条件将多个 df 的列表中的 df 提取到单独的数据帧中(如果列表中 df 的列名称包含我正在查找的名称)。
出于说明目的,我创建了一个类似于我所处情况的示例。
我有一个包含多个数据框的列表,该列表的输出如下:
structure(list(V1 = structure(list(lvef = c(0.965686195194885,
0.0806777632648268, -0.531729196500083, -0.511913109608259, -0.413670941196816,
-0.0501899795864357, -0.337583918771946, 1.16086745780346, -0.478358865835724,
-1.95009138673888), hbc = c(-0.389950511350405, -0.904388183933348,
0.811821977223064, -0.868381700124344, -0.637307418402866, -1.04703715824204,
-0.394340445217658, -0.194653869597247, 0.00822402232044511,
-0.145032587618231), id = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "NA", class = "factor")), .Names = c("lvef",
"hbc", "id"), row.names = c(NA, -10L), class = "data.frame"),
V2 = structure(list(ersta = c(-0.254360310986174, 0.3859806928747,
-0.135741797055127, 1.03929145413636, -0.484219739337178,
0.255476285148917, 1.0479422937128, 0.146613094683722, -0.914377222535014,
1.75052418161618, -0.275059500684816, 2.34861397588234, 0.00183723766664941,
0.97612891408903, 0.278868537504227, 0.456979477254684, 1.46323739326792,
0.664511602217853, 0.870420202897545, 1.38228375734407),
pgrsta = c(-1.49129812271989, 0.820330747101906, -0.0469488167129374,
0.471549380446308, -1.71312120132398, 0.0578140025416816,
1.67016363826724, 0.226180835709491, -2.00294530465909,
-0.0464857361954717, 0.306942902768782, -0.785096914460742,
0.283822632249141, -0.260774679911329, -1.2865970194309,
0.307972619170242, 0.223715024597144, -1.01642533651475,
-0.12229427204957, 0.223326519096996), id = structure(c(7L,
7L, 7L, 7L, 4L, 1L, 3L, 5L, 6L, 2L, 7L, 7L, 7L, 7L, 4L,
1L, 3L, 5L, 6L, 2L), class = "factor", .Label = c("-0.10863576856322",
"-0.317324527228699", "-0.422764348315332", "0.285132258310185",
"1.23305496219042", "1.39326602279981", "NA"))), .Names = c("ersta",
"pgrsta", "id"), row.names = c(NA, -20L), class = "data.frame"),
V3 = structure(list(hormrec = 1:15, event = structure(c(10L,
10L, 10L, 10L, 10L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"NA"), class = "factor")), .Names = c("hormrec", "event"), row.names = c(NA,
-15L), class = "data.frame"), V4 = structure(list(asat = c(-0.321423784000631,
0.181345361079582, 0.389158724418319, -1.15251833725336,
-0.351981383678293, -0.506888212379408, 0.870705917350059,
-0.626883041051641, -0.321843006223371, -0.674564527029912,
-0.609383943267379, -0.181661119817784, -1.63676077872658
), lab = structure(c(1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 2L), .Label = c("btest", "NA", "rtest"), class = "factor")), .Names = c("asat",
"lab"), row.names = c(NA, -13L), class = "data.frame")), .Names = c("V1",
"V2", "V3", "V4"))
我试图从列表中提取数据帧,条件是如果列表中的数据帧包含所需的列名称,则列表中的该数据帧应进入单独的数据帧。到目前为止,我已经能够使用以下代码将数据帧提取到列表中:
# function to extract required df's
trial <- function(x)
{
reqname <- c("hbc","ersta") # column names to check for
data <- x
lapply(seq(data), function(i){ # loop through all the data frames in the list
y <- data.frame(data[[i]]) # extract df in y
names <- names(y) # extract names of df
for(a in 1:length(reqname)) # loop through the length of reqname
{
if(reqname[a]%in%names) # check if column name/s present in current df
{
z <- y # extract df into another df
return(z) # return df
}
}
}
)
}
上面的函数返回匹配 df 的列表以及不匹配的空值。我正在寻找修改,以便所选数据框单独出现。如果有两个 df 满足要求,那么输出应该是两个单独的数据帧。
我将感谢所有帮助寻找解决方案的人。
最佳答案
您可以轻松使用 lapply()
以及自定义函数来识别所需的输出。例如,如果 k 是您的列表,
trial <- function(x)
{
reqnames <- c("hbc","ersta")
k <- lapply(k, function(x) any(names(x) %in% reqnames))
k <- which(k==1)
x[k]
}
这将输出一个列表,其中仅包含至少包含 reqnames 中的一个名称的数据帧。
关于r - 根据r中的列名从列表中提取数据框,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32251679/