所以我有一堆中间数据框对象,它们在我的全局环境中根据它们的顺序进行编号。即 IRIS1_St、IRIS2_Db、IRIS1_Start、IRIS2_FIXAR、IRIS4_Change、IRIS10_bananas
我已经弄清楚如何提取这些并返回行数和列数而不保留列表列(见下文),并且从技术上讲我已经提取了列名称。但我一生都无法弄清楚如何将此 colname 列转换为不是列表的内容,这样我就可以比较滞后值并返回一个更简单的列来显示新内容。我尝试过 data.table()、data.frame()、as.character(),然后使用 str_replace_all() 将其转换为向量。但似乎没有任何效果,这似乎是因为我不擅长使用列表!
library(dplyr)
library(purrr)
library(stringr)
IRIS1_St <- iris
IRIS2_Db <- IRIS1_St %>%
mutate(Petal.Length2 = Petal.Length*2)
IRIS3_Sum <- IRIS2_Db %>%
mutate(Sepal.sum = sum(Sepal.Length, Sepal.Width))
IRIS4_Change <- IRIS3_Sum %>%
mutate(SL.Change = Sepal.Length - lag(Sepal.Length)) %>% filter(Petal.Length >=4)
IRIS10_bananas <- IRIS4_Change %>% mutate( bananas = case_when(Sepal.Length >6 ~ "BANANAS!!"))
Obj_Size <- grep("^IRIS",names(.GlobalEnv),value=TRUE) %>%
na.omit() %>%
mget(envir = globalenv()) %>%
{OS <<-.} %>%
map_df(nrow) %>%
pivot_longer(1:max(ncol(.)), names_to = "Obj_name", values_to = "nrow") %>%
left_join(OS %>%
map_df(ncol) %>%
pivot_longer(1:max(ncol(.)), names_to = "Obj_name", values_to = "ncol")
) %>%
data.frame(OS %>%
lapply(colnames) %>%
data.table()) %>%
mutate(number = as.numeric(replace_na(str_extract(Obj_name, "(?i)(?<=IRIS\\D{0,1})\\d+"), 0))) %>%
arrange(number, Obj_name) %>%
select(-number) %>% data.frame() %>%
rename(colnames = '.')
#just to seperate out the colname extraction I've done so far
OST <- OS %>% lapply(colnames) %>% data.table()
为了提取新内容,我尝试了以下操作,但由于我列出了一个列表,所以结果搞砸了。
Obj_Size_New <- Obj_Size %>%
mutate(lag_col = as.character(lag(colnames)),
new_col = setdiff(as.character(colnames), lag_col))
预期输出如下;
Obj_name nrow ncol new_col
1 IRIS1_St 150 5 Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Species
2 IRIS2_Db 150 6 Petal.Length2
3 IRIS3_Sum 150 7 Sepal.sum
4 IRIS4_Change 89 8 SL.Change
5 IRIS10_bananas 89 9 bananas
我使用下面 akrun 的建议重写了上面的内容;
library(dplyr)
library(purrr)
library(stringr)
library(tibble)
Obj_Size <- grep("^IRIS",names(.GlobalEnv),value=TRUE) %>% #ID all objects in GE starting with "IRIS"
na.omit() %>%
mget(envir = globalenv()) %>% #Use base R to get them
{OS <<-.} %>% #create intermediate object in GE to join to later
map_df(nrow) %>% #Map nrow using purrr
pivot_longer(1:max(ncol(.)), names_to = "Obj_name", values_to = "nrow") %>% #pivot so it's readable
left_join(OS %>% #repeat with ncol and join back to dataset
map_df(ncol) %>%
pivot_longer(1:max(ncol(.)), names_to = "Obj_name", values_to = "ncol")
) %>%
left_join(OS %>% #repeat with colnames
map(colnames) %>%
enframe() %>% #create 2 col dataframe
rename(Obj_name = name,
colnames = value)) %>%
mutate(number = as.numeric(replace_na(str_extract(Obj_name, "(?i)(?<=IRIS\\D{0,1})\\d+"), 0))) %>% #extract number after IRIS in object name so we can order correctly even when we get to 10 as when ordered by name with 10 it puts it after 1.
arrange(number, Obj_name) %>%
select(-number) %>%
data.frame() %>%
mutate (new_col = map2_chr(colnames, lag(colnames), ~toString(setdiff(.x, .y)))) #Id changes between colnames and only return anything new.
最佳答案
我们可以使用map2
在这里执行setdiff
,通过比较list
列来返回一个list
列与 list
列的 lag
library(dplyr)
library(purrr)
Obj_Size %>%
mutate(new_col = map2(colnames, lag(colnames), setdiff), colnames = NULL)
-输出
# Obj_name nrow ncol new_col
#1 IRIS1_St 150 5 Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Species
#2 IRIS2_Db 150 6 Petal.Length2
#3 IRIS3_Sum 150 7 Sepal.sum
#4 IRIS4_Change 89 8 SL.Change
#5 IRIS10_bananas 89 9 bananas
如果需要是字符
列,请使用
library(stringr)
Obj_Size %>%
mutate(new_col = map2_chr(colnames, lag(colnames),
~ str_c(setdiff(.x, .y), collapse=", ")), colnames = NULL)
-输出
# Obj_name nrow ncol new_col
#1 IRIS1_St 150 5 Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Species
#2 IRIS2_Db 150 6 Petal.Length2
#3 IRIS3_Sum 150 7 Sepal.sum
#4 IRIS4_Change 89 8 SL.Change
#5 IRIS10_bananas 89 9 bananas
或者将base R
与Map
结合使用
Obj_Size$new_col <- Map(setdiff, Obj_Size$colnames, c(NA, head(Obj_Size$colnames,-1)))
关于r - 从全局环境中的对象获取 colname(具有特定模式),然后返回新内容,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64272738/