我有一个与此类似的数据框:(事实上,for 循环中有 16 个)
head(data)
# A tibble: 1 x 4
AAA AAC AB AC
1 18 25 39 9
2 20 25 30 7
我想根据列的原始名称动态更改所有列名称,如下所示(我已尝试使用 str_glue
,但出现错误):
### I have a for-loop: (NOT WORKING) (this is a part of the loop)
assign(str_glue("df_{str_sub(data[i], 23, - 5)}"),
read.delim(data[i], sep = ",", header = T) %>%
mutate(ID = Participants,
str_glue("New_{str_sub(data[i], 23, - 5)}_AAA") = AAA,
str_glue("New_{str_sub(data[i], 23, - 5)}_AAB") = AAC,
str_glue("New_{str_sub(data[i], 23, - 5)}_AB") = AB,
str_glue("New_{str_sub(data[i], 23, - 5)}_AC") = AC)
期望的输出:
### Note:
### depending on the index-i,
### str_glue("New_{str_sub(data[i], 23, - 5)}_AAA") will get me either 50,100 or 150
### desired output for i = 1
New_50_AAA New_50_AAC New_50_AB New_50_AC
1 18 25 39 9
2 20 25 30 7
我确信有一种优雅的方法可以做到这一点。我看过一些相关的帖子,但似乎没有一个对我有帮助。有任何想法吗?谢谢:)
PS:如果还有一种方法可以动态重复原始列的名称而不用 str_ 重复,那就完美了,它会节省我 4 行
编辑
整个循环如下所示:
“data”是 16 个 .txt 文件的列表,每个文件名为“xxxxxxxxx_xx_50.txt”、“xxxxxxxxx_xx_100.txt”(依此类推)
for (i in 1:length(data)) {
if (grepl("xxxxxxxxx_x1_.txt$", data[i])) {
assign(str_glue("df_narr{str_sub(data[i], 23, - 5)}"),
read.delim(data[i], sep = ",", header = T) %>%
mutate(ID = Participants,
str_glue("New_1{str_sub(data[i], 23, - 5)}_AAA") = AAA,
str_glue("New_1{str_sub(data[i], 23, - 5)}_AAB") = AAC,
str_glue("New_1{str_sub(data[i], 23, - 5)}_AB") = AB,
str_glue("New_1{str_sub(data[i], 23, - 5)}_AC") = AC) %>%
mutate_if(is.numeric, round, digits = 2))
} else if (grepl("xxxxxxxxx_x2_.txt$", data[i])) {
assign(str_glue("df_narr{str_sub(data[i], 23, - 5)}"),
read.delim(data[i], sep = ",", header = T) %>%
mutate(ID = Participants,
str_glue("New_2{str_sub(data[i], 23, - 5)}_AAA") = AAA,
str_glue("New_2{str_sub(data[i], 23, - 5)}_AAB") = AAC,
str_glue("New_2{str_sub(data[i], 23, - 5)}_AB") = AB,
str_glue("New_2{str_sub(data[i], 23, - 5)}_AC") = AC) %>%
mutate_if(is.numeric, round, digits = 2))
}
}
最佳答案
最好将数据集保存在 list
中并重命名 - 使用 list.files
获取文件夹中的文件,然后提取数字 (文件 ('nm1') 中的
- 一位或多位数字),循环遍历文件以及在 .txt
之前的\\d+Map
中提取的名称,读取数据并通过粘贴
“New_”、相应数字(“nm”)和原始列名称来修改列名称
files <- list.files(path = 'path/to/your/folder', pattern = "\\.txt$",
full.names = TRUE)
nm1 <- sub(".*_(\\d+)\\.txt", "\\1", basename(files))
lst1 <- Map(\(x, nm) {
tmp <- read.table(x)
num_cols <- sapply(tmp, is.numeric)
tmp[num_cols] <- lapply(tmp[num_cols], round, digits = 2)
cols_to_rename <- names(tmp) != "Participants"
names(tmp)[cols_to_rename] <- paste0("New_", nm, "_",
names(tmp)[cols_to_rename])
names(tmp)[!cols_to_rename] <- "ID"
tmp
}, files, nm1)
或者使用tidyverse
library(dplyr)
library(readr)
library(purrr)
library(stringr)
lst2 <- imap(setNames(files, nm1), ~ {
nm <- .y
read_table(.x) %>%
rename_with(~ str_c("New_", nm, "_", .x), -Participants) %>%
mutate(across(where(is.numeric), round, digits = 2)) %>%
rename(ID = Participants)
})
或者在for
循环中
# for storing the output from the `for` loop
lst3 <- vector('list', length(files))
# loop over the sequence of files
for(i in seq_along(files)) {
tmp <- read.table(files[i])
cols_to_rename <- names(tmp) != "Participants"
names(tmp)[cols_to_rename] <- paste0("New_", nm1[i], "_",
names(tmp)[cols_to_rename])
names(tmp)[!cols_to_rename] <- "ID"
num_cols <- sapply(tmp, is.numeric)
tmp[num_cols] <- lapply(tmp[num_cols], round, digits = 2)
lst3[[i]] <- tmp
}
关于根据原始列名称重命名列 R,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/73938635/