我有一个简单的数据框,如下所示:
structure(list(DAILY_INJ_DATE = c("2018-01", "2018-02", "2018-03",
"2018-04", "2018-05", "2018-06", "2018-07", "2018-08", "2018-09",
"2018-10", "2018-11", "2018-12", "2019-01", "2019-02", "2019-03",
"2019-04", "2019-05", "2019-06", "2019-07", "2019-08", "2019-09",
"2019-10", "2019-11", "2019-12", "2020-01", "2020-02", "2020-03",
"2020-04", "2020-05", "2020-06", "2020-07", "2020-08", "2020-09",
"2020-10", "2020-11", "2020-12", "2018-01", "2018-02", "2018-03",
"2018-04", "2018-05", "2018-06", "2018-07", "2018-08", "2018-09",
"2018-10", "2018-11", "2018-12", "2019-01", "2019-02", "2019-06",
"2019-07", "2019-08", "2019-09", "2019-10", "2019-11", "2019-12",
"2020-01", "2020-02", "2020-03", "2020-04", "2020-05", "2020-06",
"2020-07", "2020-08", "2020-09", "2020-10", "2020-11", "2020-12"
), PID = c("42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000"), InjIndex = c(3.1488349310755e-05,
7.16470821042452e-05, 3.08198068625437e-05, 0.00365977544989287,
0.000102146739534363, 6.97288098968181e-05, 6.67030385322113e-05,
0.000101198808641258, 6.96471158898905e-05, 0.000100457907956119,
0.002770103468248, 0.000141272149337637, 3.71747211895911e-05,
0, 0, 0, NA, NA, 0.00261196063305948, 0.0020329847793613, 0.0268256888287629,
0.0190615086256689, 0.00165037617202441, 0.00823890291192408,
0.0149562009694358, 3.82198063529811e-05, 0.00703837718531629,
0.0460765131610604, 0.0571638755572333, 0.0600559821857274, 0.0636357826177028,
0.0643659884529977, 0.0577969845601966, 0.0588167585535698, 0.0593479205060031,
0.0478238114640216, 0.0579565073781893, 0.0439869629670818, 0.056714771440236,
0.122274207049878, 0.136105301010138, 0.133225772135695, 0.126920643583703,
0.128496063591315, 0.14043302451169, 0.113191351198699, 0.125443452699286,
0.146339474772728, 0.0191599802822513, NA, 0.133221262910392,
0.216814720357711, 0.606926958546271, NA, NA, 0.131402308568841,
NA, 0.355567523506574, NA, 0.0234750006884004, 0.0416741137140514,
NA, NA, 0.0585083175072382, NA, 0.0852075310970539, 0.0691143041976479,
NA, NA)), row.names = c(NA, 69L), class = "data.frame")
我打算首先根据“PID
”列的递增顺序,然后按“Daily_Inj_Date”的降序顺序对该表进行排序
'列。
我使用以下语法:
df1 <- df[order(df$PID, -as.numeric(df$DAILY_INJ_DATE)),]
但是我得到的结果是不正确的。 例如, PID ='42135311180000' 的最后日期是“2020-12”,但执行排序后,结果数据框显示:
最佳答案
日期不是 Date
类,可以通过粘贴日期
将其转换为 Date
,然后转换为 Date
与 as.Date
,强制转换为 numeric
,然后执行 order
df1 <- df[order(df$PID, -as.numeric(as.Date(paste0(df$DAILY_INJ_DATE, "-01")))),]
-检查输出
subset(df1, substr(DAILY_INJ_DATE, 1, 4) == '2018')
# DAILY_INJ_DATE PID InjIndex
#12 2018-12 42135311180000 1.412721e-04
#11 2018-11 42135311180000 2.770103e-03
#10 2018-10 42135311180000 1.004579e-04
#9 2018-09 42135311180000 6.964712e-05
#8 2018-08 42135311180000 1.011988e-04
#7 2018-07 42135311180000 6.670304e-05
#6 2018-06 42135311180000 6.972881e-05
#5 2018-05 42135311180000 1.021467e-04
#4 2018-04 42135311180000 3.659775e-03
#3 2018-03 42135311180000 3.081981e-05
#2 2018-02 42135311180000 7.164708e-05
#1 2018-01 42135311180000 3.148835e-05
#48 2018-12 42135335900000 1.463395e-01
#47 2018-11 42135335900000 1.254435e-01
#46 2018-10 42135335900000 1.131914e-01
#45 2018-09 42135335900000 1.404330e-01
#44 2018-08 42135335900000 1.284961e-01
#43 2018-07 42135335900000 1.269206e-01
#42 2018-06 42135335900000 1.332258e-01
#41 2018-05 42135335900000 1.361053e-01
#40 2018-04 42135335900000 1.222742e-01
#39 2018-03 42135335900000 5.671477e-02
#38 2018-02 42135335900000 4.398696e-02
#37 2018-01 42135335900000 5.795651e-02
或者使用tidyverse
,我们可以直接在Date
类转换列上执行此操作
library(dplyr)
library(lubridate)
df1 <- df %>%
arrange(PID, desc(ymd(DAILY_INJ_DATE, truncated = 2)))
关于r - 按多列排序不起作用 - R Dataframe,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65461180/