使用 R 中的 ggplot 在 2 个方面内按升序轴重新排序数据

标签 r ggplot2

我有一个按出发地、年份和年龄组划分的每个假期地点的过夜住宿数据框,该数据框绘制在 ggplot 中,按年份和年龄组分面:

image of 8 different box plots, facetted by year and age group

如您所见,这些行没有按照从小到大的顺序正确排序。我用 'reorder_within' and 'scale_x_reordered' by Tyler Rinker ,当数据仅由一个变量分面时,它就像一个魅力,例如Year 或 age_group,但不是两者,这是我想要的。

包括以下虚拟数据的可重现示例:

library(tidyverse)
library(scales)

reorder_within <-
  function(x,
           by,
           within,
           fun = mean,
           sep = "___",
           ...) {
    new_x <- paste(x, within, sep = sep)
    stats::reorder(new_x, by, FUN = fun)
  }
scale_x_reordered <- function(..., sep = "___") {
  reg <- paste0(sep, ".+$")
  ggplot2::scale_x_discrete(
    labels = function(x)
      gsub(reg, "", x),
    ...
  )
}


dummy_f <- data.frame(
  Eco_T = rep("dest101", 65),
  Eco_Code = rep(101, 65),
  Year = c(2018,2018,2019,2020,2019,2020,2018,2020,2021,2019,2021,2018,2021,2021,
           2020,2021,2019,2019,2018,2018,2019,2021,2020,2019,2019,2019,2019,
           2019,2020,2018,2021,2020,2021,2021,2021,2019,2021,2021,2018,2019,2018,
           2020,2020,2018,2020,2019,2018,2020,2021,2020,2021,2018,2019,2021,2019,
           2020,2021,2020,2020,2018,2018,2021,2019,2021,2018),
  age_groups = c(
    "under35","over35","under35","under35","over35","under35","over35",
    "under35","under35","under35","under35","under35","over35","over35",
    "under35","under35","over35","over35","under35","over35","under35",
    "over35","over35","under35","over35","under35","over35",
    "over35","over35","under35","under35","over35","over35","over35","under35",
    "under35","over35","over35","over35","over35","under35",
    "over35","under35","over35","over35","over35","over35","over35",
    "over35","under35","under35","over35","over35","under35", "over35",
    "over35","over35","under35","over35","over35","over35","over35","under35",
    "over35","over35"
  ),
  origin_Code = c(
    30301030,30303030,30305030,30301030,30301030,70701070,30303030,31314031,      
    30301030,30301030,30304030,31311031,30301030,30301030,30309030,30304030,      
    31314031,30301030,30301030,30301030,30301030,30303030,31316031,30301030,30301030,      
    30309030,30302030,31317031,30301030,30303030,30309030,      
    30309030,30309030,30303030,31310031,11110011,30309030,30305030,31314031,      
    30309030,30309030,30301030,30304030,30301030,30301030,30304030,30303030,      
    30309030,31314031,30305030,30301030,30302030,31313031,30305030,30302030,      
    31316031,30309030,30301030,31316031,30302030,31313031,31316031,30309030,      
    30301030,30301030 
  ),
  Origin_Name = c(
    "Wyumwym","Candcar","Brababri","Wyumwym","Wyumwym","Lihflit","Matramt",
    "Thithe","Calacap","Wyumwym","Shwoshe","Brnsbro","Clelecle","Calacap",
    "Brdabro","Keorken","Thithe","Clelecle","Clelecle","Calacap","Clelecle","Candcar",            
    "Cauncal","Calacap","Calacap","Brdabro","Sagasan",            
    "Tooto","Clelecle","Candcar","Coancoo","Orauorm","Hoanhol","Fostfor",            
    "Inreive","Gocgol","Brababri","Thithe","Brababri","Orauorm","Mueemud",            
    "Calacap","Shwoshe","Wyumwym","Wyumwym","Keorken","Hoanhol","Gocgol",            
    "Thithe","Brababri","Wyumwym","Sagasan","Relired","Brababri","Chemsche",            
    "Suhisun","Brdabro","Calacap","Maochmar","Chemsche","Relired","Cauncal","Ronarob",            
    "Wyumwym","Clelecle" 
  ),
  overnight_stays = c(
    266,132,158,143,964,78,134,47,45,130,94,42,500,105,95,590,5666,106,324,
    412,99,433,511,54,506,46,471,48,254,5257,51,388,70,421,662,419,
    130,539,60,69,435,135,2146,406,230,413,126,322,782,86,152,162,406,84,458,426,
    406,46,426,127,109,626,44,1333,961
  )
)

ggplot.object <- dummy_f %>%
  ggplot() +
  (aes(
    x = reorder_within(Origin_Name, -overnight_stays, Year),
    y = overnight_stays,
  )) + geom_point(size = 4, color = "#374c92") +
  geom_segment(
    aes(
      x = reorder_within(Origin_Name, -overnight_stays, Year),
      xend = reorder_within(Origin_Name, -overnight_stays, Year),
      y = 0,
      yend = overnight_stays
    ),
    color = "#374c92",
    size = 2
  ) +
  scale_x_reordered() +
  scale_color_distiller(type = "seq", palette = "BuPu", direction = 1,
                        limits = c(-5, NA)) +
  facet_wrap(
    age_groups ~ Year,
    dir = "v",
    scales = "free",
    ncol = 2
  ) +
  scale_y_continuous(labels = comma) +
  labs(y = "Unique Agents",
       x = "") +
  theme(
    panel.spacing.y = unit(10, units = "mm"),
    text = element_text(family = "sans-serif",
                        color = "#B6BAC3"),
    axis.text = element_text(color = "#B6BAC3",
                             size = 8),
    axis.title = element_text(color = "#B6BAC3",
                              size = 12),
    axis.line = element_line(color = "#B6BAC3"),
    strip.text = element_text(size = 15,
                              color = "#B6BAC3"),
    legend.position = "none",
    panel.background = element_rect(fill = "transparent",
                                    color = NA),
    plot.background = element_rect(fill = "transparent",
                                   color = NA),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  coord_flip()

ggplot.object

最佳答案

我们可以修改 reorder_within() 以接受任意数量的“within”变量,方法是将 within 参数替换为点:

reorder_within2 <- function(x,
                            by,
                            ...,
                            fun = mean,
                            sep = "___") {
  new_x <- paste(x, ..., sep = sep)
  stats::reorder(new_x, by, FUN = fun)
}

然后将所有方面级别传递给...:

ggplot.object <- dummy_f %>%
  ggplot() +
  aes(
    x = reorder_within2(Origin_Name, -overnight_stays, Year, age_groups),
    y = overnight_stays,
  ) + 
  geom_point(size = 4, color = "#374c92") +
  geom_segment(
    aes(
      xend = reorder_within2(Origin_Name, -overnight_stays, Year, age_groups),
      y = 0,
      yend = overnight_stays
    ),
    color = "#374c92",
    size = 2
  ) +
  # rest of code unchanged from original:
  scale_x_reordered() +
  scale_color_distiller(type = "seq", palette = "BuPu", direction = 1,
                        limits = c(-5, NA)) +
  facet_wrap(
    age_groups ~ Year,
    dir = "v",
    scales = "free",
    ncol = 2
  ) +
  scale_y_continuous(labels = comma) +
  labs(y = "Unique Agents",
       x = "") +
  theme(
    panel.spacing.y = unit(10, units = "mm"),
    text = element_text(family = "sans-serif",
                        color = "#B6BAC3"),
    axis.text = element_text(color = "#B6BAC3",
                             size = 8),
    axis.title = element_text(color = "#B6BAC3",
                              size = 12),
    axis.line = element_line(color = "#B6BAC3"),
    strip.text = element_text(size = 15,
                              color = "#B6BAC3"),
    legend.position = "none",
    panel.background = element_rect(fill = "transparent",
                                    color = NA),
    plot.background = element_rect(fill = "transparent",
                                   color = NA),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  coord_flip()

ggplot.object

y 轴现在在每个面内从小到大排序:

A two x four faceted lollipop plot. Each panel shows Origins on the y axis and Overnight Stays on the x axis, with horizontal bars showing stays per origin. The bars within each facet increase in length along the y axis from top to bottom.

关于使用 R 中的 ggplot 在 2 个方面内按升序轴重新排序数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/71120774/

相关文章:

R apply 语句不适用于矩阵

html - 图像和文本并排 Rstudio presentation/.rpres

r - 如何绘制堆积比例图?

r - 如何在R中的ggplot2中躲避点

r - ggplot2:一组为实线,另一组为点

r - 在ggplot中使用geom_smooth显示标准偏差

r - 密谋:add_trace循环

r - mlrMBO rBayesian通过插入符号优化 R keras 模型的错误

r - SliderInput in Shiny -- 如何控制 Shiny slider 的长度/宽度?

r - 如何在ggplot2的geom_text()标签中向标签添加希腊字母