r - 如何使用 geom_label_repel 避免图中标签重叠?

标签 r ggplot2 bioinformatics ggrepel

我正在尝试使用大量数据制作火山图。在这里显示一些数据。

tab7 <- structure(list(logFC = c(-1.27422400347856, -0.972370320302353, 
-1.63545104297305, 0.921263558062452, -0.922767678335555, 1.12992019747864, 
-1.12813908449641, -0.886389856541233, -1.56395091318283, -0.787704367926754, 
0.774615263438003, -1.00983166398568, 1.26941043079936, 0.796918483265524, 
-0.918242602294084, -0.832196243332996, 1.03096613040452, -0.817670622470357, 
0.932412638932054, -1.51103619988004, 0.99279720767736, -0.723506437294531, 
0.832067216048265, 0.839856396735337, -0.882234889894418), logCPM = c(7.86936217233427, 
6.33767227465018, 5.14411397560036, 6.5874873826811, 7.82025605146652, 
5.39870254174873, 5.32742166278357, 6.82202548031827, 3.08154645281093, 
11.718268197204, 7.12821560768394, 5.61968280451023, 3.49750759570402, 
7.11160629416375, 8.84669666906905, 6.74763398946045, 6.47266715414911, 
8.68269736238628, 4.95579660996355, 3.19010537271808, 4.40038075369154, 
8.15304798507271, 5.51204801844412, 6.55289240838477, 5.30047595313181
), F = c(82.3844641280083, 57.1521525156373, 63.0298758207238, 
49.4757942110162, 47.8613242874421, 45.4868410091626, 45.1572301486676, 
43.7556847965406, 42.3072570929808, 38.9673540472504, 38.7844055524106, 
37.9172543011411, 37.1949696821981, 36.4845161241415, 36.388796490688, 
36.1521771859631, 37.1195555247141, 35.1179262289968, 34.9714174442816, 
34.3515229787112, 33.8072274933505, 33.4471746407308, 32.1242403799937, 
31.5096611721688, 31.4822805974465), PValue = c(8.42825601917746e-09, 
1.74097802848866e-07, 2.5202773507131e-07, 5.34483590370978e-07, 
6.87453204425229e-07, 1.00657587023585e-06, 1.06245353897287e-06, 
1.34100176471235e-06, 1.7151684753499e-06, 3.0945677492103e-06, 
3.19932402461853e-06, 3.75155343906861e-06, 4.2915391212445e-06, 
4.90669309108533e-06, 4.99670077770197e-06, 5.22705003318299e-06, 
5.94436067086159e-06, 6.37969536054904e-06, 6.56435248175749e-06, 
7.41302885498296e-06, 8.25780079812361e-06, 8.87426754732628e-06, 
1.16112919352203e-05, 1.31868786236996e-05, 1.32623209940135e-05
), FDR = c(0.000124215637210637, 0.00123812825316032, 0.00123812825316032, 
0.00196930478872187, 0.0020263370653638, 0.00223692003676889, 
0.00223692003676889, 0.00247046050104133, 0.00280868366552298, 
0.00428651249771163, 0.00428651249771163, 0.00460753288208276, 
0.00481476646181568, 0.00481476646181568, 0.00481476646181568, 
0.00481476646181568, 0.00509186457242852, 0.00509186457242852, 
0.00509186457242852, 0.00546266096323694, 0.00579540324584504, 
0.0059449525051134, 0.00744031393657724, 0.00748287613133365, 
0.00748287613133365), Significant = c("FDR < 0.05", "FDR < 0.05", 
"FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", 
"FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", 
"FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", 
"FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", "FDR < 0.05", 
"FDR < 0.05", "FDR < 0.05", "FDR < 0.05"), genelabels = c("FOSL1", 
"TEAD4", "STC2", "SNORA28", "SRRM1", "SNORA38", "MGRN1", "HEATR2", 
"VDR", "SERPINE1", "DPY19L1", "PPRC1", "IFIT1", "ARL6IP5", "SRRM2", 
"", "", "", "", "", "", "", "", "", "")), row.names = c("FOSL1", 
"TEAD4", "STC2", "SNORA28", "SRRM1", "SNORA38", "MGRN1", "HEATR2", 
"VDR", "SERPINE1", "DPY19L1", "PPRC1", "IFIT1", "ARL6IP5", "SRRM2", 
"ARHGAP17", "SNORA34", "HMOX1", "SNORA46", "HSF2BP", "SNORA66", 
"BAG3", "SEPP1", "ARL4C", "SLC7A5"), class = "data.frame")

数据框 tab7 具有基因标签,带有 logFC 和 FDR 详细信息。

使用 geom_label_repel 和数据绘制火山图。我用来制作火山图的代码在这里。
p = ggplot(tab7, aes(logFC, -log10(FDR))) +
  geom_point(aes(col=Significant))  +
  theme(text = element_text(size=12, face = "bold")) +
  scale_color_manual(values=c("red", "black")) +
  theme(legend.text=element_text(size=10, face = "bold"))

x_limits <- c(0, NA)
p1 <- p+geom_label_repel(data=dplyr::filter(tab7, FDR<0.05), aes(label=genelabels),
                         size=8, fontface = "bold",
                         arrow = arrow(length = unit(0.03,"npc"), type = "closed", 
                                       ends = "last", angle = 15),
                         force = 10,
                         xlim  = x_limits) +
  xlab("log2 fold change") + 
  ylab("-log10 adjusted p-value (FDR)") +
  theme(axis.title=element_text(size=14,face="bold"),
        axis.text = element_text(size = 14, face = "bold") ) +
  scale_x_continuous(limits = c(-5, 10))

p1

情节是这样的
enter image description here

在上图中,具有正 logFC 的基因标签与具有负 logFC 的标签重叠。我想要左侧为负 logFC 和右侧为正 logFC 的基因标签。

任何帮助表示赞赏。谢谢你。

最佳答案

我们可以使用不同的 分别为负值和正值添加标签xlim 范围。

注:我删除了所有不相关的美学以保持对问题的关注:

How to keep labels for negative values on the left and for positive values on the right


library(ggplot2)
library(ggrepel)

ggplot(tab7, aes(logFC, -log10(FDR))) +
  geom_point()  +
  geom_label_repel(data = tab7[ tab7$FDR < 0.05 & tab7$logFC < 0, ], 
                   aes(label = genelabels),
                   xlim = c(NA, -1), # <--- here
                   seed = 1) +
  geom_label_repel(data = tab7[ tab7$FDR < 0.05 & tab7$logFC > 0, ], 
                   aes(label = genelabels),
                   xlim = c(1, NA),  # <--- here
                   seed = 1) +
  scale_x_continuous(limits = c(-4, 4))

enter image description here

Volcano Plots 也有专用的 R 包,例如,参见:
  • EnhancedVolcano
  • 关于r - 如何使用 geom_label_repel 避免图中标签重叠?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55858446/

    相关文章:

    r - ggplots2 geom_line 和 geom_point 的组合沿线创建了太多形状

    python - 为什么我的shell说: From: too many arguments

    r - 通过模式匹配熔化柱

    r - 自动将 y 轴文本和网格线对齐到 ggplot 的底部和顶部

    r - 安装 R 包 PearsonDS 会出现意外 token 错误

    r - 如何同时绘制单条趋势线和多条趋势线?

    python - 如何退出非尾递归而不计算额外结果?

    r - 比较多个不同长度的向量

    r - 同一轴上的两个刻度

    r - 调试lapply/sapply调用