减少一些繁琐的代码以求简单

标签 r function loops dplyr mapply

我的数据样本如下(真实数据是近 50 万育龄妇女):在这个数据集中,我有一个由妇女拥有的所有 child 组成的妇女平等)。此行一直持续到 30 个子级(ageownchild_pernum1:ageownchild_pernum30)。例如,一名妇女有 2 个 child ,该妇女有 30 行(填充 child 的年龄),但只有第一行和第二行填充该妇女有 child 的年龄,其他行填充 NA。为了简单起见,这里我只带来了两行并省略了其他行。

library("tidyverse")
DataSet1<-
tibble(
id = c(1,2,3,4,5,6,7,8,9,10),
ageownchild_pernum1 = c(18,24,13,16,9,NA,17,13,32,7 ),
ageownchild_pernum2=  c(16,NA,9 ,10,7,NA,13,11,20,5 ),
AGE=  c(38,52 ,41 ,43 ,38 ,36 ,40 ,36 ,56,31),
F_curve_notch_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
F_curve_notch_15.25= c(34.01,40.33,51.74,51.74,34.01,34.01,34.01,34.01,73.85,41.91),
f_curve_tilde_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
f_curve_tilde_15.25= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
)

F_curve_notch 和 f_curve_tilde 适用于年龄(15 至 49,.25)。

现在,我想对我的数据执行这个庞大的过程,可能会达到一千多行代码。

DataSet1$low_notch      <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 - 0.75,0)
DataSet1$high_notch     <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 + 0.75,0)
DataSet1$low_low_notch  <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 - 1.25,0)
DataSet1$high_high_notch<-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 + 1.25,0)
DataSet1$low_low_notch  <-ifelse ((DataSet1$low_low_notch>=20)   & (DataSet1$low_low_notch<35)  
,DataSet1$low_low_notch+0.25,DataSet1$low_low_notch)
DataSet1$high_high_notch<-ifelse ((DataSet1$high_high_notch>=20) & 
(DataSet1$high_high_notch<35), DataSet1$high_high_notch+0.25, DataSet1$high_high_notch)

notch <- function(a, b,c,d){
ifelse((a<= 15)&(b>=15)&(c!= 0),0.01*d,c)
}
DataSet1$f_curve_notched_15<-mapply('notch',DataSet1$low_low_notch, DataSet1$high_high_notch, 
DataSet1$f_curve_notched_15,DataSet1$f_curve_tilde_15, DataSet1$f_curve_notched_15)

对于所有ageownchild_pernum(1:30) 和f_curve_notched(15 到49, .25),应继续执行此过程。我非常感谢您提供的任何帮助。

最佳答案

可以使用多个数据透视表在一次运行中对所有 f_curve 年龄和所有子列进行相互比较:

library(tidyverse)

DataSet1<-
  tibble(
    id = c(1,2,3,4,5,6,7,8,9,10),
    ageownchild_pernum1 = c(18,24,13,16,9,NA,17,13,32,7 ),
    ageownchild_pernum2=  c(16,NA,9 ,10,7,NA,13,11,20,5 ),
    AGE=  c(38,52 ,41 ,43 ,38 ,36 ,40 ,36 ,56,31),
    f_curve_notch_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
    f_curve_notch_15.25= c(34.01,40.33,51.74,51.74,34.01,34.01,34.01,34.01,73.85,41.91),
    f_curve_tilde_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
    f_curve_tilde_15.25= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
  )

notch <- function(a, b,c,d){
  ifelse((a<= 15)&(b>=15)&(c!= 0),0.01*d,c)
}



dat_out <- DataSet1 |>
  pivot_longer(
    starts_with("f_curve"),
    names_to = c("marker", "age_cat"),
    names_pattern = c("f_curve_(.*)_(.*)")
  ) |> 
  pivot_wider(names_from = marker, values_from = value) |> 
  pivot_longer(
    starts_with("ageownchild"),
    names_to = "child_n",
    values_to = "child_age",
    names_prefix = "ageownchild_pernum"
  ) |> 
  filter(!is.na(child_age)) |> 
  mutate(
    low_notch = child_age - 0.75,
    high_notch = child_age + 0.75,
    low_low_notch = child_age - 1.25,
    high_high_notch = child_age + 1.25,
    low_low_notch = if_else(low_low_notch>=20 & low_low_notch<35, low_low_notch+0.25, low_low_notch),
    high_high_notch = if_else(high_high_notch>=20 & high_high_notch<35, high_high_notch+0.25, high_high_notch),
    f_curve_notch = notch(low_low_notch, high_high_notch, notch, tilde)
  )

dat_out
#> # A tibble: 34 × 12
#>       id   AGE age_cat notch tilde child_n child_age low_notch high_notch
#>    <dbl> <dbl> <chr>   <dbl> <dbl> <chr>       <dbl>     <dbl>      <dbl>
#>  1     1    38 15       25.1  25.1 1              18     17.2       18.8 
#>  2     1    38 15       25.1  25.1 2              16     15.2       16.8 
#>  3     1    38 15.25    34.0  25.1 1              18     17.2       18.8 
#>  4     1    38 15.25    34.0  25.1 2              16     15.2       16.8 
#>  5     2    52 15       30.3  30.3 1              24     23.2       24.8 
#>  6     2    52 15.25    40.3  30.3 1              24     23.2       24.8 
#>  7     3    41 15       43.3  43.3 1              13     12.2       13.8 
#>  8     3    41 15       43.3  43.3 2               9      8.25       9.75
#>  9     3    41 15.25    51.7  43.3 1              13     12.2       13.8 
#> 10     3    41 15.25    51.7  43.3 2               9      8.25       9.75
#> # ℹ 24 more rows
#> # ℹ 3 more variables: low_low_notch <dbl>, high_high_notch <dbl>,
#> #   f_curve_notch <dbl>

这意味着每位女性对于 child 和 f_curve 年龄段的每个组合都会有一行。如果需要,可以将这些数据转回到更广泛的数据集,以便为每个女性提供一列:

dat_out |> 
  pivot_wider(
    names_from = c(child_n, age_cat),
    values_from = f_curve_notch,
    names_prefix = "f_curve_notch_"
  )
#> # A tibble: 34 × 13
#>       id   AGE notch tilde child_age low_notch high_notch low_low_notch
#>    <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>      <dbl>         <dbl>
#>  1     1    38  25.1  25.1        18     17.2       18.8          16.8 
#>  2     1    38  25.1  25.1        16     15.2       16.8          14.8 
#>  3     1    38  34.0  25.1        18     17.2       18.8          16.8 
#>  4     1    38  34.0  25.1        16     15.2       16.8          14.8 
#>  5     2    52  30.3  30.3        24     23.2       24.8          23   
#>  6     2    52  40.3  30.3        24     23.2       24.8          23   
#>  7     3    41  43.3  43.3        13     12.2       13.8          11.8 
#>  8     3    41  43.3  43.3         9      8.25       9.75          7.75
#>  9     3    41  51.7  43.3        13     12.2       13.8          11.8 
#> 10     3    41  51.7  43.3         9      8.25       9.75          7.75
#> # ℹ 24 more rows
#> # ℹ 5 more variables: high_high_notch <dbl>, f_curve_notch_1_15 <dbl>,
#> #   f_curve_notch_2_15 <dbl>, f_curve_notch_1_15.25 <dbl>,
#> #   f_curve_notch_2_15.25 <dbl>

关于减少一些繁琐的代码以求简单,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/77036680/

相关文章:

r - 使用 read.table() 读取 CSV 文件时出错

r - 在不聚合的情况下旋转数据框

c++ - 如何编写一个模板函数,它接受一个数组和一个指定数组大小的 int

java - 有没有办法在数组的 foreach 循环中创建对象

r - 从R中的向量中提取单词的总频率

r - R中是否有数据类型 "Decimal"?

javascript - 向js中函数内部的对象添加方法

JavaScript:具有多个值的参数

ASP.NET 在循环内添加控件

php - Mysql num rows in while循环