r 表列出多个具有频率的分类变量

标签 r r-markdown papaja

library("tidyverse")
library("papaja")

df <- structure(list(investment_type = structure(c(3L, 3L, 3L, 3L, 
3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
3L), .Label = c("angel", "pre_seed", "seed"), class = "factor"), 
    gender_d = c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 
    1, 1, 1, 1, 0, 1), state_code_org = structure(c(3L, 22L, 
    3L, 15L, 3L, 4L, 3L, 3L, 22L, 3L, 29L, 25L, 8L, 29L, 10L, 
    6L, 22L, 4L, 17L, 23L, 17L), .Label = c("AL", "AR", "CA", 
    "CO", "CT", "DC", "DE", "FL", "GA", "IL", "KS", "LA", "MA", 
    "MD", "MN", "MO", "NC", "NE", "NH", "NJ", "NV", "NY", "OH", 
    "OR", "PA", "RI", "SC", "TN", "TX", "UT", "VA", "VT", "WA", 
    "WI", "WY"), class = "factor"), first_time_founder_d = c(0, 
    1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, NA, 1, 0, 0, 1, 
    0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-21L))

df <- df %>%
  select(investment_type,
         state_code_org,
         gender_d,
         first_time_founder_d) %>%
  mutate_at(c("gender_d", "first_time_founder_d"), list(~ factor(.))) %>%
  mutate(gender_d=factor(ifelse(gender_d==1, "Male", "Female"))) %>%
  mutate(first_time_founder_d=factor(ifelse(first_time_founder_d==1, "Yes", "No"))) %>%
  mutate(investment_type=factor(ifelse(investment_type=="angel", "Angel", ifelse(investment_type=="pre_seed", "Pre-Seed", "Seed")))) %>%
  drop_na() %>%
  summary() %>%
  as.data.frame()

# Clean up columns
df <- df %>%
  select(-Var1) %>%
  rename(Variable=Var2, N=Freq) %>%
  mutate(Variable=factor(ifelse(Variable=="investment_type", "Investment Type", ifelse(Variable=="state_code_org", "State", ifelse(str_detect(Variable, "gender_d"), "Gender", "First-Time Founder"))))) %>%
  drop_na()

# break N into level and N
df <- df %>%
  separate(col = N, into = c("Level", "N"), sep = ":")

# Remove white space in values
df <- df %>% 
  mutate(
    Variable=trimws(Variable)) %>%
  mutate(
    Level=trimws(Level)) %>%
  mutate(
    N=trimws(N))

# Convert N to integer
df <- df %>% 
  mutate(N=as.integer(N))

df <- df %>% 
  group_by(Variable) %>% 
  arrange(Variable, desc(N))

apa_table(
  df,
  # stub_indents = list("1", "2"),
  caption = "Summary of categorical variables.",
  note = "Missing data is not shown.")

这是我现在得到的。

我愿意使用任何包——这恰好使用了 papaja。但它需要在带有 PDF 输出的 rmarkdown 中工作并符合 APA 风格。

我希望表格折叠变量值,这样它们就不会重复多次,并将状态(其他)移动到状态分组的底部。像这样(不同的数据集)作为例子:

最佳答案

这是使用 apa_table() 的另一种方法。

首先是一种更简单的方法来汇总您的数据:

library("dplyr")
library("tidyr")

df <- structure(list(investment_type = structure(c(3L, 3L, 3L, 3L, 
                                             3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
                                             3L), .Label = c("angel", "pre_seed", "seed"), class = "factor"), 
               gender_d = c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 
                            1, 1, 1, 1, 0, 1), state_code_org = structure(c(3L, 22L, 
                                                                            3L, 15L, 3L, 4L, 3L, 3L, 22L, 3L, 29L, 25L, 8L, 29L, 10L, 
                                                                            6L, 22L, 4L, 17L, 23L, 17L), .Label = c("AL", "AR", "CA", 
                                                                                                                    "CO", "CT", "DC", "DE", "FL", "GA", "IL", "KS", "LA", "MA", 
                                                                                                                    "MD", "MN", "MO", "NC", "NE", "NH", "NJ", "NV", "NY", "OH", 
                                                                                                                    "OR", "PA", "RI", "SC", "TN", "TX", "UT", "VA", "VT", "WA", 
                                                                                                                    "WI", "WY"), class = "factor"), first_time_founder_d = c(0, 
                                                                                                                                                                             1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, NA, 1, 0, 0, 1, 
                                                                                                                                                                             0)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
                                                                                                                                                                                                                                          -21L))

factor_level_count <- df %>% 
  mutate(
    gender_d = factor(gender_d, levels = c(0, 1), labels = c("Female", "Male"))
    , first_time_founder_d = factor(first_time_founder_d, levels = c(0, 1), labels = c("No", "Yes"))
    , investment_type = factor(investment_type, levels = c("angel", "pre_seed", "seed"), labels = c("Angel", "Pre-Seed", "Seed"))
  ) %>%
  na.exclude %>% 
  pivot_longer(cols = everything()) %>% 
  group_by(name, value) %>% 
  count() %>%
  ungroup() %>% 
  mutate(
    name = factor(name , levels = c("first_time_founder_d", "gender_d", "investment_type", "state_code_org"), labels = c("Firt-Time Founder", "Gender", "Investement Type", "State"))
  ) %>% 
  group_by(name) %>% 
  mutate(percent = printnum(n / sum(n) * 100, digits = 1)) %>% 
  rename(Variable = value, N = n, "%" = percent)

现在您可以拆分 data.frame 并将它们重新组合成命名列表以获得 stub 缩进。

factor_level_count_list <- split(factor_level_count, f = factor_level_count$name, drop = TRUE) %>% 
  lapply(function(x) x[, -1]) # Removes split-column

library("papaja")

apa_table(
  factor_level_count_list
  , align = "llr"             # Right-align last column
  , caption = "Summary of categorical variables."
  , note = "Missing data is not shown."
  , merge_method = "indent"   # Table style to use for merging list elements
  , midrules = c(3, 6, 9)
)

enter image description here

关于r 表列出多个具有频率的分类变量,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60019195/

相关文章:

r - dplyr 代码 "df %>% group_by(date = cut(date, breaks = "1 小时"))"不再产生所需的结果?

r - 在 R 或 SQLite 中分别将多列中的字符串拆分为新行

r - 当 latex 包丢失时,tinytex 无法写入目录

r-markdown - RMarkdown 中交叉引用方程编织到 docx (papaja)

r - 如何按 R 中的分组列求和?

r - 使用 R 将一列提取为行,保留其他列

r - 使用 dplyr : Within groups, 选择第一个满足条件的值

r - 错误 : Input files not all in same directory, 请提供明确的 wd

r - 有没有办法在 gt 包中用希腊符号重命名表列?