r - 如何使用动画 ggplot2-plot 管理并行处理?

标签 r animation ggplot2 parallel-processing snowfall

我正在尝试使用 ggplot2 构建动画条形图和 magick这是在“每天”的基础上增长的。不幸的是,我的数据集中有一万个条目(几年中每天的日期和不同的类别),这使得处理非常缓慢。因此,我使用 snow包以加快处理时间。
但是,我在拆分数据并调用 ggplot() 时遇到了麻烦。在一个集群中。
magick需要为动画和 snow 分割每个日期的数据需要按集群拆分以进行并行处理。所以,我得到了一个列表列表,这在调用 ggplot() 时会导致问题在 clusterApply() 内.列表的结构当然取决于我拆分数据的顺序(请参阅示例代码中的版本 1 和 2),但还没有版本导致成功。
我想在使用 data$date 时访问列表元素不起作用,因为现在列表中有更多级别。

所以,我的问题是:是否可以通过 ggplot2 构建动画图?通过以这种方式使用并行处理?

这是可视化我的问题的示例代码(我试图尽可能地对其进行结构化):

########################################################################
# setup
########################################################################
library(parallel)
library(snow)
library(ggplot2)
library(magick)

# creating some sample data for one year
# 4 categories; each category has a specific value per day
set.seed(1)
x <- data.frame(
  rep(as.Date((Sys.Date()-364):Sys.Date(), origin="1970-01-01"),4),
  c(rep("cat01",length.out=365),
    rep("cat02",length.out=365),
    rep("cat03",length.out=365),
    rep("cat04",length.out=365)),
  sample(0:50,365*4, replace=TRUE)
)
colnames(x) <- c("date", "category", "value")
x$category <- factor(x$category)

# creating a cumulative measure making the graphs appear "growing"
x$cumsum <- NA
for(i in levels(x$category)){
  x$cumsum[x$category == i] <- cumsum(x$value[x$category == i])
}
x <- x[order(x$date),]

# number of cores
cores <- detectCores()

# clustering
cl <- makeCluster(cores, type="SOCK")

# adding a grouping-variable to the data for each cluster
x$group <- rep(1:cores, length.out = nrow(x))

########################################################################
# splitting the data
########################################################################
# V1: worker first, plotting second
# splitting data for the worker
datasplit01 <- split(x, x$group)

# splitting data for plotting
datalist01 <- clusterApply(cl, datasplit01, function(x){split(x, x$date)})

########################################################################
# V2: plotting first, worker second
# splitting data for plotting
datasplit02 <- split(x, x$date)

# splitting data for the worker
datalist02 <- clusterApply(cl, datasplit02, function(x){split(x, x$group)})

########################################################################
# conventional plotting
########################################################################
# plotting the whole data works fine
ggplot(x)+
  geom_bar(aes(category, value), stat = "identity")

########################################################################
# conventional animation with ggplot2
########################################################################
# animation per date works, but pretty slowly

# opening magick-device
img <- image_graph(1000, 700, res = 96)

# plotting 
  # replace the second line with first line if the code is too slow and if
  # you like to get an impression of what the plot should look like
# out <- lapply(datasplit02[1:50], function(data){   # line 1: downscaled dataset
out <- lapply(datasplit02, function(data){           # line 2: full dataset
  plot <- ggplot(data)+
    geom_bar(aes(category, cumsum), stat = "identity")+
    # holding breaks and limits constant per plot
    scale_y_continuous(expand = c(0,0), 
                       breaks = seq(0,max(x$cumsum)+500,500), 
                       limits = c(0,max(x$cumsum)+500))+
    ggtitle(data$date)
  print(plot)
})
dev.off()

# animation
animation <- image_animate(img, fps = 5)
animation

########################################################################
# parallel process plotting
########################################################################
# animation per date in parallel processing does not work, probably
# due to ggplot not working with a list of lists

# opening magick-device
img <- image_graph(1000, 700, res = 96)

# plotting
out <- clusterApply(cl, datalist01, function(data){
  plot <- ggplot(data)+
    geom_bar(aes(category, cumsum), stat = "identity")+
    # holding breaks and limits constant per plot
    scale_y_continuous(expand = c(0,0), 
                       breaks = seq(0,max(x$cumsum)+500,500), 
                       limits = c(0,max(x$cumsum)+500))+
    ggtitle(data$date)
  print(plot)
})
dev.off()

# animation
animation <- image_animate(img, fps = 5)
animation

谢谢你的建议!

更新:使用降雪,代码要短得多,我没有得到同样的错误,但设备仍然没有产生情节。
########################################################################
# snowfall version
########################################################################
library(parallel)
library(snowfall)
library(ggplot2)
library(magick)

# creating some sample data for one year
# 4 categories; each category has a specific value per day
set.seed(1)
x <- data.frame(
  rep(as.Date((Sys.Date()-364):Sys.Date(), origin="1970-01-01"),4),
  c(rep("cat01",length.out=365),
    rep("cat02",length.out=365),
    rep("cat03",length.out=365),
    rep("cat04",length.out=365)),
  sample(0:50,365*4, replace=TRUE)
)
colnames(x) <- c("date", "category", "value")
x$category <- factor(x$category)

# creating a cumulative measure making the graphs appear "growing"
x$cumsum <- NA
for(i in levels(x$category)){
  x$cumsum[x$category == i] <- cumsum(x$value[x$category == i])
}
x <- x[order(x$date),]

# number of cores
cores <- detectCores()

# clustering
sfInit(parallel = TRUE, cpus = cores, type = "SOCK")

# splitting data for plotting
datalist <- split(x, x$date)

# making everything accessible in the cluster
sfExportAll()
sfLibrary(ggplot2)
sfLibrary(magick)

# opening magick-device
img <- image_graph(1000, 700, res = 96)

# plotting
out <- sfLapply(datalist, function(data){
  plot <- ggplot(data)+
    geom_bar(aes(category, cumsum), stat = "identity")+
    # holding breaks and limits constant per plot
    scale_y_continuous(expand = c(0,0), 
                       breaks = seq(0,max(x$cumsum)+500,500), 
                       limits = c(0,max(x$cumsum)+500))+
    ggtitle(data$date)
plot
})
dev.off()

# animation
animation <- image_animate(img, fps = 5)
animation

使用时
img <- image_graph(1000, 700, res = 96)
out
dev.off()
animation <- image_animate(img, fps = 5)
animation

情节产生了。但是,调用 out非常慢,这就是为什么我必须避免使用此选项才能使其正常工作。

最佳答案

所以,我的解决方案:

  • ncores 中拆分日期期
  • 获取每个时期的图并将其保存为 GIF
  • 回读所有 GIF 并将它们合并

  • ########################################################################
    # setup
    ########################################################################
    
    # creating some sample data for one year
    # 4 categories; each category has a specific value per day
    set.seed(1)
    x <- data.frame(
      rep(as.Date((Sys.Date()-364):Sys.Date(), origin="1970-01-01"),4),
      c(rep("cat01",length.out=365),
        rep("cat02",length.out=365),
        rep("cat03",length.out=365),
        rep("cat04",length.out=365)),
      sample(0:50,365*4, replace=TRUE)
    )
    colnames(x) <- c("date", "category", "value")
    
    # creating a cumulative measure making the graphs appear "growing"
    library(dplyr)
    x <- x %>%
      as_tibble() %>%
      arrange(date) %>%
      mutate(date = as.character(date)) %>%
      group_by(category) %>%
      mutate(cumsum = cumsum(value))
    
    y_max <- max(x$cumsum) + 500
    
    library(doParallel)
    
    all_dates <- unique(x$date)
    ncores <- detectCores() - 1
    ind_cluster <- sort(rep_len(1:ncores, length(all_dates)))
    date_cluster <- split(all_dates, ind_cluster)
    registerDoParallel(cl <- makeCluster(ncores))
    
    tmp <- tempfile()
    
    files <- foreach(ic = 1:ncores, .packages = c("tidyverse", "magick")) %dopar% {
    
      img <- image_graph(1000, 700, res = 96)
    
      x %>%
        filter(date %in% date_cluster[[ic]]) %>%
        group_by(date) %>%
        do(
          plot = ggplot(.) +
            geom_col(aes(category, cumsum)) +
            scale_y_continuous(expand = c(0, 0), 
                               breaks = seq(0, y_max, 500), 
                               limits = c(0, y_max))
        ) %>%
      pmap(function(date, plot) {
        print(plot + ggtitle(date))
        NULL
      })
    
      dev.off()
    
      image_write(image_animate(img, fps = 5), paste0(tmp, ic, ".gif"))
    }
    stopCluster(cl)
    
    test <- do.call(c, lapply(files, magick::image_read))
    test
    

    关于r - 如何使用动画 ggplot2-plot 管理并行处理?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50697548/

    相关文章:

    r - 条形图中的渐变填充

    r - (R, dplyr) 选择多个以相同字符串开头的列并按组汇总平均值 (90% CI)

    R:在摘要中转换 do.call()-summary

    java - 处理三角函数代码不起作用

    r - 如何仅显示带有 facet 的 polar ggplot 的部分绘图区域?

    r - 为 R 中的所有行添加前缀

    javascript - Jquery 和 CSS 按钮切换旋转动画

    ios - UIViewControllerTransitioningDelegate 使用 NSLayoutConstraint 问题关闭动画

    r - 在 scale_x_datetime 中抑制警告

    r - 如何将已计算的标准误差值添加到条形图 (ggplot) 中的每个条形中?