我需要用 NA 替换数据帧的第一列和最后一列中的任何零,但是,当替换第一个/最后一个零时,我还需要替换该特定行中存在的任何连续零。给定示例数据框:
df <- data.frame(a = c(1,0,1,0,1,1,1,0,1,1,1),
b = c(1,1,1,0,1,1,1,0,1,1,1),
c = c(1,0,1,1,1,0,1,0,1,1,1),
d = c(1,1,1,0,1,1,1,1,1,1,1),
e = c(1,0,1,0,1,1,1,1,1,1,1),
f = c(1,1,1,1,1,1,1,1,1,0,1))
df
我需要它返回:
df.result <- data.frame(a = c(1,NA,1,NA,1,1,1,NA,1,1,1),
b = c(1,1,1,NA,1,1,1,NA,1,1,1),
c = c(1,0,1,1,1,0,1,NA,1,1,1),
d = c(1,1,1,0,1,1,1,1,1,1,1),
e = c(1,0,1,0,1,1,1,1,1,1,1),
f = c(1,1,1,1,1,1,1,1,1,NA,1))
df.result
提前致谢。
最佳答案
另一种方式,避免apply
和对行进行操作:
g<-lapply(df,"==",0)
df[do.call(cbind,Reduce("&",g,accumulate=TRUE)) | do.call(cbind,Reduce("&",g,accumulate=TRUE,right=TRUE))]<-NA
identical(df,df.result)
#[1] TRUE
快速基准测试:
docendo<-function(df) {
idx <- t(apply(df != 0, 1, function(x) cumsum(x) == 0 | rev(cumsum(rev(x)) == 0)))
df[idx] <- NA
df
}
nicola<-function(df) {
g<-lapply(df,"==",0)
df[do.call(cbind,Reduce("&",g,accumulate=TRUE)) | do.call(cbind,Reduce("&",g,accumulate=TRUE,right=TRUE))]<-NA
df
}
lmo<-function(df) {
reps.first <- max.col(df, ties.method = "first") - 1
reps.last <- max.col(df, ties.method = "last")
fill.last <- length(df)-reps.last
is.na(df[cbind(rep(seq_len(nrow(df))[reps.first > 0], reps.first[reps.first > 0]),
sequence(reps.first))]) <- TRUE
is.na(df[cbind(rep(seq_len(nrow(df))[fill.last > 0], fill.last[fill.last > 0]),
length(df)-(sequence(fill.last) - 1))]) <- TRUE
df
}
#create a bigger dataset
df<-df[rep(1:nrow(df),each=10000),]
system.time(res<-docendo(df))
# user system elapsed
# 2.088 0.020 2.145
system.time(res2<-nicola(df))
# user system elapsed
# 0.016 0.000 0.017
identical(res,res2)
#[1] TRUE
system.time(res3<-lmo(df))
# user system elapsed
# 0.222 0.000 0.265
identical(res2,res3)
#[1] TRUE
关于替换在R中数据帧任一端找到的连续零,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43917830/