我的数据框中的数据格式为:
structure(list(O2Range = c("112 MAX", "16/19", "16/190", "12 MAX",
NA, NA, NA, NA, NA, NA, NA, "16/20", "18/22", NA, "16/20", NA,
"11/13", NA, "16/190", NA)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
显而易见,低 O2 读数和高 O2 读数在列中用“/”分隔,但有时会先列出数字,然后列出“MAX”(即:112 MAX)。
我试图通过以下方式将此列分成两个新列:
library(tidyverse)
data$O2High <- if (str_detect(data$O2Range, "/")) {str_split_fixed(data$O2Range, fixed("/"), 2)[, 2]
} else {str_split_fixed(data$O2Range, fixed(" "), 2)[, 2]}
data$O2Low <- if (str_detect(data$O2Range, "/")) {str_split_fixed(data$O2Range, fixed("/"), 2)[, 1]
} else {str_split_fixed(data$O2Range, fixed(" "), 2)[, 1]}
但是,结果并没有达到预期:
structure(list(O2High = c("MAX", "", "", "MAX", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", ""), O2Low = c("112",
"16/19", "16/190", "12", "", "", "", "", "", "", "", "16/20",
"18/22", "", "16/20", "", "11/13", "", "16/190", "")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
我的 if/else 语句似乎出了问题,但我无法解决这个问题。有什么想法吗?
预期输出:
Expected output:
structure(list(O2High = list("112", "19", "190", "12", NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, "20", "22", NA_character_,
"20", NA_character_, "13", NA_character_, "190", NA_character_),
O2Low = list("MAX", "16", "16", "MAX", NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, "16", "18", NA_character_,
"16", NA_character_, "11", NA_character_, "16", NA_character_)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
谢谢你, 克里斯
最佳答案
不确定如何处理 MAX
,但是...
library(stringi)
as.data.frame(data) %>%
mutate(o2High = stri_extract_all_regex(O2Range, "(?<=/)[0-9]+"),
o2Low = stri_extract_all_regex(O2Range, "[0-9]+(?=\\/)"))
O2Range o2High o2Low
1 112 MAX NA NA
2 16/19 19 16
3 16/190 190 16
4 12 MAX NA NA
5 <NA> NA NA
6 <NA> NA NA
7 <NA> NA NA
8 <NA> NA NA
9 <NA> NA NA
10 <NA> NA NA
11 <NA> NA NA
12 16/20 20 16
13 18/22 22 18
14 <NA> NA NA
15 16/20 20 16
16 <NA> NA NA
17 11/13 13 11
18 <NA> NA NA
19 16/190 190 16
20 <NA> NA NA
或者
as.data.frame(df) %>%
mutate(
o2High = stri_extract_all_regex(O2Range, "(?<=/)[0-9]+|[0-9]+(?=\\sMAX)"),
o2Low = stri_extract_all_regex(O2Range, "[0-9]+(?=\\/)")
)
O2Range o2High o2Low
1 112 MAX 112 NA
2 16/19 19 16
3 16/190 190 16
4 12 MAX 12 NA
5 <NA> NA NA
6 <NA> NA NA
7 <NA> NA NA
8 <NA> NA NA
9 <NA> NA NA
10 <NA> NA NA
11 <NA> NA NA
12 16/20 20 16
13 18/22 22 18
14 <NA> NA NA
15 16/20 20 16
16 <NA> NA NA
17 11/13 13 11
18 <NA> NA NA
19 16/190 190 16
20 <NA> NA NA
关于r - if/else 语句中的 str_split_fixed : unexpected results,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58104711/