r - 根据特定列更改数据框中的值

标签 r dataframe dplyr na tidy

我目前有一个 置信区间的下限和上限以及点估计组成。我想创建一个新的数据框,以便可以绘制点估计不等于零的变量。对于n的每个值,是否可以将上限和下限的值更改为如果点估计值为0?例如,在n = 205时提供的数据框中,y_pe = 0所以我想更改相应的y_loy_up NA

X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939,  1.317839 ,11.891103, 15.165398,  1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000,  0.000000 , 0.000000 , 0.000000,  0.000000)


test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe) 

提前致谢。

最佳答案

使用列名称在 ij 中指定逻辑向量,然后将 'y_pe' 为 0 的列分配给 NA

test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA

如果数据是 data.frame 并且希望应用于除第一列 ('n') 之外的所有列集,那么我们可以拆分 为一组 data.frames 并进行替换

nm1 <- sub("_.*", "", colnames(test.df)[-1])
out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
       function(x) {
         i1 <- endsWith(names(x), "pe")
         x[!x[,i1], !i1] <- NA
        x})))
out
#   x_lo x_up x_pe X1_lo X1_up X1_pe      y_lo     y_up      y_pe
#1    NA   NA    0    NA    NA     0 -24.71177 14.07412 -2.984101
#2    NA   NA    0    NA    NA     0 -25.13779 14.05121 -2.867680
#3    NA   NA    0    NA    NA     0 -16.19142 13.41795 -2.695838
#4    NA   NA    0    NA    NA     0 -15.63819 12.18732 -2.583140
#5    NA   NA    0    NA    NA     0 -15.42051 13.60202 -2.416878
#6    NA   NA    0    NA    NA     0        NA       NA  0.000000
#7    NA   NA    0    NA    NA     0        NA       NA  0.000000
#8    NA   NA    0    NA    NA     0        NA       NA  0.000000
#9    NA   NA    0    NA    NA     0        NA       NA  0.000000
#10   NA   NA    0    NA    NA     0        NA       NA  0.000000

test.df[names(out)] <-  out
test.df
#     n X1_lo X1_up x_lo x_up      y_lo     y_up X1_pe x_pe      y_pe
#1  205    NA    NA   NA   NA -24.71177 14.07412     0    0 -2.984101
#2  205    NA    NA   NA   NA -25.13779 14.05121     0    0 -2.867680
#3  205    NA    NA   NA   NA -16.19142 13.41795     0    0 -2.695838
#4  205    NA    NA   NA   NA -15.63819 12.18732     0    0 -2.583140
#5  205    NA    NA   NA   NA -15.42051 13.60202     0    0 -2.416878
#6  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#7  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#8  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#9  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#10 205    NA    NA   NA   NA        NA       NA     0    0  0.000000

或者使用tidyverse

library(dplyr)
library(tidyr)
library(stringr)
test.df %>%
   mutate(rn = row_number()) %>% 
   pivot_longer(cols = -c(n, rn), names_sep="_", 
        names_to = c("group", ".value")) %>%
   mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
   pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
          names_repair = ~ str_replace(., "(.*)_(.*)", "\\2_\\1")) %>%
   select(-rn)
# A tibble: 10 x 10
#       n X1_lo  x_lo  y_lo X1_pe  x_pe  y_pe X1_up  x_up  y_up
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1   205    NA    NA -24.7     0     0 -2.98    NA    NA  14.1
# 2   205    NA    NA -25.1     0     0 -2.87    NA    NA  14.1
# 3   205    NA    NA -16.2     0     0 -2.70    NA    NA  13.4
# 4   205    NA    NA -15.6     0     0 -2.58    NA    NA  12.2
# 5   205    NA    NA -15.4     0     0 -2.42    NA    NA  13.6
# 6   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 7   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 8   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 9   205    NA    NA  NA       0     0  0       NA    NA  NA  
#10   205    NA    NA  NA       0     0  0       NA    NA  NA  

数据

test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)

关于r - 根据特定列更改数据框中的值,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61069569/

相关文章:

R 包构建 : How to import a function from a package not on CRAN

r - 检查数据框是否为空的最快方法

r - 如果 df 列表中的 df 缺失,则从字符向量添加列

r - dplyr 的过滤器不适用于 lubridate 的时间格式?

r - dplyr: "Error in n(): function should not be called directly"

R:在保持顺序的同时折叠列中的重复值

r - 为什么我无法在 Ubuntu 上安装最新版本的 R?

r - 如何在 R 中编写 Bootstrap Probit 模型的代码选择

apache-spark - Apache Spark : Relationship between action and job, Spark UI

r - 取消列出数据框中的所有列表元素