r - 从点到原点的 ggplot 线和余弦分数

标签 r ggplot2

我试图在 ggplot 中做两件事.

首先:从 3 个点中的每一个到原点 c(0,0) 画一条线

其次:将余弦计算所得的值与下图相似的余弦曲线相加。

enter image description here

library(dplyr)
library(ggplot2)
points1 <- c(0.025, 0.075, 0.050)
points2 <- c(0.07, 0.0725, 0.074)

cbind(points1, points2) %>%
  data.frame() %>%
  ggplot(aes(x = points1, y = points2)) +
  geom_point() +
  scale_x_continuous(limits = c(0.000, 0.08)) +
  scale_y_continuous(limits = c(0.060, 0.08))


library(text2vec)
library(Matrix)
m <- cbind(points1, points2) %>%
  Matrix(sparse = TRUE)

dist2(m, method = "cosine")

编辑:

感谢@astrofunkswag,我的解决方案如下:
library(dplyr)
library(ggplot2)
library(tidyquant)

points1 <- c(0.025, 0.075, 0.070)
points2 <- c(0.07, 0.0725, 0.020)

df <- data.frame(points1, points2)


arc_fun <- function(pt1, pt2, n.angle = 100, rad = .03) {
  a1 = atan2(pt1[2], pt1[1])
  a2 = atan2(pt2[2], pt2[1])
  n.angle = 100
  angles <- seq(a1,a2,len=n.angle)
  xc = 0
  yc = 0
  xs <- xc+rad*cos(angles)
  ys <- yc+rad*sin(angles)
  return(data.frame(x = xs, y = ys))
}

arc_df <- arc_fun(as.numeric(df[1,]), as.numeric(df[3,]))

arc_df2 <- arc_fun(as.numeric(df[2,]), as.numeric(df[3,]), rad = .02)

arc_df3 <- arc_fun(as.numeric(df[1,]), as.numeric(df[2,]), rad = .05)


library(text2vec)
library(Matrix)
m <- cbind(points1, points2) %>%
  Matrix(sparse = TRUE)

dist <- dist2(m, method = "cosine")
dist
dist[1, 2]
dist[2, 3]
dist[3, 1]



df %>%
  ggplot(aes(x = points1, y = points2)) + 
  geom_point(shape = 21, colour = "black", fill = "grey", size = 8, stroke = 1) +
  scale_x_continuous(limits = c(0.000, 0.08)) +
  scale_y_continuous(limits = c(0.000, 0.08)) +
  geom_segment(aes(xend=points1, yend = points2), x = 0, y = 0, color = "grey") +
  geom_line(data = arc_df, aes(x, y), color = 'red') +
  geom_line(data = arc_df2, aes(x, y), color = 'blue') +
  geom_line(data = arc_df3, aes(x, y), color = 'green') +
  ggtitle("Cosine distance between points in a 2-D space") +
  theme_bw() +
  theme(plot.title = element_text(size = 22, face = "bold"),
    #axis.text.x = element_blank(), axis.text.y = element_blank(),
    axis.title.x = element_blank(), axis.title.y = element_blank(),
    panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  geom_text(aes(label = c("Doc A", "Doc B", "Doc C"), hjust = -0.25, vjust = 0), size = 5) +
  annotate("text", x = 0.022, y = 0.035, label = paste0("θ = ", round(dist[2, 3], 2)), size = 6) +
  annotate("text", x = 0.0125, y = 0.019, label = paste("θ = ", round(dist[3, 1], 2)), size = 6) +
  annotate("text", x = 0.0125, y = 0.007, label = paste("θ = ", round(dist[1, 2], 2)), size = 6)

哪些输出:

enter image description here

最佳答案

第一部分很容易使用 geom_segment .第二部分我改编自this post .它可能不是您在美学上正在寻找的东西,但它确实有效。

df <- data.frame(points1, points2)

plt <- df %>% ggplot(aes(x = points1, y = points2)) + 
  geom_point() +
  scale_x_continuous(limits = c(0.000, 0.08)) +
  scale_y_continuous(limits = c(0.000, 0.08))

plt <- plt + geom_segment(aes(xend=points1, yend = points2), x = 0, y=0)

arc_fun <- function(pt1, pt2, n.angle = 100, rad = .03) {
  a1 = atan2(pt1[2], pt1[1])
  a2 = atan2(pt2[2], pt2[1])
  n.angle = 100
  angles <- seq(a1,a2,len=n.angle)
  xc = 0
  yc = 0
  xs <- xc+rad*cos(angles)
  ys <- yc+rad*sin(angles)
  return(data.frame(x = xs, y = ys))
}

arc_df <- arc_fun(as.numeric(df[1,]), as.numeric(df[3,]))

arc_df2 <- arc_fun(as.numeric(df[2,]), as.numeric(df[3,]), rad = .02)

arc_df3 <- arc_fun(as.numeric(df[1,]), as.numeric(df[2,]), rad = .05)

plt + geom_line(data = arc_df, aes(x, y), color = 'red') +
  geom_line(data = arc_df2, aes(x, y), color = 'blue') +
  geom_line(data = arc_df3, aes(x, y), color = 'green')

enter image description here

您可以使用很多选项,例如线条和点的颜色和形状。

关于r - 从点到原点的 ggplot 线和余弦分数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57911916/

相关文章:

r - 使用 ggraph 绘制二元组共现时出现问题,某些线条未显示

r - grid.Call(L_textBounds,as.graphicsAnnot(x $ label),x $ x,x $ y,: polygon edge not found (new)中的错误

r - 忽略 dplyr 连接中的区分大小写

r - 使用 summarise_each() 计算忽略 NA 的记录

r - ggplot2正负值不同颜色渐变

r - 如何转换条形图的 y 轴 (ggplot2)

r - R 中的双向条形图

regex - R中的动态正则表达式

r - 如何计算数据框中以逗号分隔的值

r - 如何添加不同大小和颜色的ggplot2字幕?