r - 使用 R 的带有 httr 包的 POST 请求

标签 r post web-scraping httr

我想使用 httr 从 POST 请求中获取输出来自以下网站:

http://www.e-grunt.ba

当您单击“ZK Ulošci”时,您可以看到提交表单。

我想发送POST请求并获取输出。例如,您可以从下拉窗口中选择任何内容并在“Broj Uloška”字段中输入 1,然后单击“Traži”。

这是我的尝试:

library(httr)
library(tidyverse)
library(rvest)

    output <- httr::POST(
      "http://www.e-grunt.ba/home.jsf",
      body = list(
        "form:court_focus" = "440",
        "form:cuTransferLast" = "17.07.2019",
        "form:municipality_input" = "4400000001",
        "form:mpart_focus" = "44000087",
        "form:folder" = 1,
        `recaptcha-token` = "some token",
        submit = "form:j_idt61"
        ),
      add_headers(Referer = "http://www.e-grunt.ba/"),
      encode = "form",
      verbose()
    )

但这只是返回主页的内容。

我知道使用 (R)Selenium 会更容易,但我想使用 httr 来做到这一点。和 POST如果可能的话。

最佳答案

我找到了通往的方法刮 这个 ASP.net 站点。如果有人需要类似的东西,我将提供代码:

start_session <- function() {
  p <- html_session(
    "http://www.e-grunt.ba", 
    user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36")
  )
  viewState <- p %>% html_nodes("input") %>% .[[2]] %>% html_attr("value") 
  p <- rvest:::request_POST(
    p, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      "javax.faces.partial.ajax" = "true",
      "javax.faces.source" = "j_idt8:j_idt15",
      "javax.faces.partial.execute" = "@all",
      "javax.faces.partial.render" = "content",
      "j_idt8:j_idt15" = "j_idt8:j_idt15",
      "j_idt8" = 'j_idt8',
      'javax.faces.ViewState' = viewState
    ),
    encode = "form"
  )
  attr(p, "viewState") <- viewState
  p
}

# EXTRACT METADATA --------------------------------------------------------

p <- start_session()
name_value_pairs <- function(html, css, cnames) {
  x <- read_html(html) %>% 
    html_nodes(css) %>% 
    html_children() %>% 
    html_attr("value")
  y <- read_html(html) %>% 
    html_nodes(css) %>% 
    html_children() %>% 
    html_text()
  df <- cbind.data.frame(x, y, stringsAsFactors = FALSE)
  df <- df[df[, 1] != -1, ]
  colnames(df) <- cnames
  df
}
courts <- name_value_pairs(p$response$content, css = '[id="form:court_input"]', cnames = c("court_id", "court"))

metadata_post <- function(session_zk, view_state, id) {
  p <- rvest:::request_POST(
    session_zk, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      'javax.faces.partial.ajax' = 'true',
      'javax.faces.source' = 'form:court',
      'javax.faces.partial.execute' = 'form:court',
      'javax.faces.partial.render' = 'msgs msgsBottom form:municipality form:mpart form:cuTransferLast',
      'javax.faces.behavior.event' = 'change',
      'javax.faces.partial.event' = 'change',
      'form' = 'form',
      'g-recaptcha-response' = '',
      'form:court_focus' = '',
      'form:court_input' = id,
      'form:cuTransferLast' = '',
      'form:municipality_focus' = '',
      'form:mpart_focus' = '',
      'form:folder' = '',
      'form:parcel' = '',
      'form:parcelSub' = '',
      'javax.faces.ViewState' = view_state
    ),
    encode = "form"
  )
  return(p)
}

muni_post <- function(session_zk, view_state, id, muni_id) {
  p <- rvest:::request_POST(
    session_zk, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      'javax.faces.partial.ajax' = 'true',
      'javax.faces.source' = 'form:municipality',
      'javax.faces.partial.execute' = 'form:municipality',
      'javax.faces.partial.render' = 'msgs msgsBottom form:mpart',
      'javax.faces.behavior.event' = 'change',
      'javax.faces.partial.event' = 'change',
      'form' = 'form',
      'g-recaptcha-response' = '',
      'form:court_focus' = '',
      'form:court_input' = id,
      'form:cuTransferLast' = '',
      'form:municipality_focus' = '',
      'form:municipality_input' = muni_id,
      'form:mpart_focus' = '',
      'form:folder' = '',
      'form:parcel' = '',
      'form:parcelSub' = '',
      'javax.faces.ViewState' = view_state
    ),
    encode = "form"
  )
  return(p)
}


metadata_i <- list()
for (i in seq_along(courts$court_id)) {
  print(i)
  p <- metadata_post(p, attributes(p)$viewState, courts$court_id[i])
  muni <- name_value_pairs(p$response$content, css = '[id="form:municipality_input"]', cnames = c("muni_id", "muni"))
  
  if (nrow(muni) > 1) {
    muni_ko <- list()
    for (j in seq_along(muni$muni_id)) {
      # print(j)
      p <- muni_post(p, attributes(p)$viewState, courts$court_id[i], muni$muni_id[j])
      ko <- name_value_pairs(p$response$content, css = '[id="form:mpart_input"]', cnames = c("ko_id", "ko"))
      if (nrow(ko) == 0) {
        ko <- data.frame(ko_id = NA, ko = NA, stringsAsFactors = FALSE)
      }
      muni_ko[[j]] <-  cbind.data.frame(muni[j, ], ko, stringsAsFactors = FALSE)
    }
    metadata_i[[i]] <- cbind.data.frame(courts[i, ], do.call(rbind, muni_ko), stringsAsFactors = FALSE)
  } else {
    ko <- name_value_pairs(p$response$content, css = '[id="form:mpart_input"]', cnames = c("ko_id", "ko"))
    meta <- cbind.data.frame(courts[i, ], muni, stringsAsFactors = FALSE)
    metadata_i[[i]] <- cbind.data.frame(meta, ko, stringsAsFactors = FALSE)
  }
}
metadata <- do.call(rbind, metadata_i)

metadata_post <- function(session_zk, view_state, recaptcha, court,
                          date = as.character(format.Date(Sys.Date() - 4, "%d.%m.%Y")),
                          muni, ko, zk
) {
  p <- rvest:::request_POST(
    session_zk, 
    "http://www.e-grunt.ba/home.jsf",
    add_headers(
      'Referer' = 'http://www.e-grunt.ba'
    ),
    body = list(
      'form' = 'form',
      'g-recaptcha-response' = recaptcha,
      'form:court_focus' = '',
      'form:court_input' = court,
      'form:cuTransferLast' = date,
      'form:municipality_focus' = '',
      'form:municipality_input' = muni,
      'form:mpart_focus' = '',
      'form:mpart_input' = ko,
      'form:folder' = zk,
      'form:parcel' = '',
      'form:parcelSub' = '',
      'form:j_idt61' = '',
      'javax.faces.ViewState' = view_state
    ),
    encode = "form"
  )
  return(p)
}

# example
result <- break_captcha()
p <- metadata_post(session_zk = p, view_state = attributes(p)$viewState, 
                   recaptcha = result, court = metadata$court_id[i],
                   muni = metadata$muni_id[i], ko =  metadata$ko_id[i], zk = j)

关于r - 使用 R 的带有 httr 包的 POST 请求,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57136135/

相关文章:

r - 如何向geom_histogram 添加密度曲线和平均线?

sql - 通过 R 连接到 Azure SQL

ios - 使用 UIWebView 时让 POST 始终通过 NSUrlCache 的问题

python - 如何从具有不同 id 名称的段落中抓取文本?

javascript - 您可以自动将产品从在线商店添加到 WooCommerce 吗?

r - 如何列出与 R 中另一个变量相关的变量的类别?

r - 为什么 h2o 包中的交叉验证数据有不同的 MSE

post - Wildfly 10 忽略的最大帖子大小

php - PHP 中的 HTTP POST 请求

c# - 如何在C#中设置下载超时