go - 使用 Colly 框架我无法登录 Evernote 帐户

标签 go web-scraping evernote

我正在使用 colly 框架来抓取网站。我正在尝试登录 Evernote 帐户以抓取一些东西。但我无法通过它。我使用“用户名”和“密码”标题来提供凭据。这是正确的方法吗?

提前谢谢你。

package main

import (
 "log"
 "github.com/gocolly/colly"
)

func main() {
 // create a new collector
 c := colly.NewCollector()

// authenticate
err := c.Post("https://www.evernote.com/Login.action", 
map[string]string{"username": 
  "XXXXXX@XXX.com", "password": "*********"})

if err != nil {
    log.Fatal("Error : ",err)
}

    // attach callbacks after login
   c.OnResponse(func(r *colly.Response) {
        log.Println("response received", r.StatusCode)
   })

    // start scraping
   c.Visit("https://www.evernote.com/")
}

最佳答案

你应该尝试模仿浏览器的行为,看看这个实现,我在每一步都添加了注释:

package evernote

import (
    "bytes"
    "errors"
    "fmt"
    "io/ioutil"
    "net/http"
    "net/http/cookiejar"
    "net/url"
    "regexp"
    "strings"
)

const (
    evernoteLoginURL = "https://www.evernote.com/Login.action"
)

var (
    evernoteJSParamsExpr = regexp.MustCompile(`document.getElementById\("(.*)"\).value = "(.*)"`)
    evernoteRedirectExpr = regexp.MustCompile(`Redirecting to <a href="(.*)">`)

    errNoMatches   = errors.New("No matches")
    errRedirectURL = errors.New("Redirect URL not found")
)

// EvernoteClient wraps all methods required to interact with the website.
type EvernoteClient struct {
    Username   string
    Password   string
    httpClient *http.Client

    // These parameters persist during the login process:
    hpts  string
    hptsh string
}

// NewEvernoteClient initializes a new Evernote client.
func NewEvernoteClient(username, password string) *EvernoteClient {
    // Allocate a new cookie jar to mimic the browser behavior:
    cookieJar, _ := cookiejar.New(nil)

    // Fill up basic data:
    c := &EvernoteClient{
        Username: username,
        Password: password,
    }

    // When initializing the http.Client, copy default values from http.DefaultClient
    // Pass a pointer to the cookie jar that was created earlier:
    c.httpClient = &http.Client{
        Transport:     http.DefaultTransport,
        CheckRedirect: http.DefaultClient.CheckRedirect,
        Jar:           cookieJar,
        Timeout:       http.DefaultClient.Timeout,
    }
    return c
}

func (e *EvernoteClient) extractJSParams(body []byte) (err error) {
    matches := evernoteJSParamsExpr.FindAllSubmatch(body, -1)
    if len(matches) == 0 {
        return errNoMatches
    }
    for _, submatches := range matches {
        if len(submatches) < 3 {
            err = errNoMatches
            break
        }
        key := submatches[1]
        val := submatches[2]

        if bytes.Compare(key, hptsKey) == 0 {
            e.hpts = string(val)
        }
        if bytes.Compare(key, hptshKey) == 0 {
            e.hptsh = string(val)
        }
    }
    return nil
}

// Login handles the login action.
func (e *EvernoteClient) Login() error {
    // First step: fetch the login page as a browser visitor would do:
    res, err := e.httpClient.Get(evernoteLoginURL)
    if err != nil {
        return err
    }
    if res.Body == nil {
        return errors.New("No response body")
    }
    body, err := ioutil.ReadAll(res.Body)
    if err != nil {
        return err
    }
    err = e.extractJSParams(body)
    if err != nil {
        return err
    }

    // Second step: we have extracted the "hpts" and "hptsh" parameters
    // We send a request using only the username and setting "evaluateUsername":
    values := &url.Values{}
    values.Set("username", e.Username)
    values.Set("evaluateUsername", "")
    values.Set("analyticsLoginOrigin", "login_action")
    values.Set("clipperFlow", "false")
    values.Set("showSwitchService", "true")
    values.Set("hpts", e.hpts)
    values.Set("hptsh", e.hptsh)

    rawValues := values.Encode()
    req, err := http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
    if err != nil {
        return err
    }
    req.Header.Set("Accept", "application/json")
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
    req.Header.Set("x-requested-with", "XMLHttpRequest")
    req.Header.Set("referer", evernoteLoginURL)
    res, err = e.httpClient.Do(req)
    if err != nil {
        return err
    }
    body, err = ioutil.ReadAll(res.Body)
    if err != nil {
        return err
    }
    bodyStr := string(body)
    if !strings.Contains(bodyStr, `"usePasswordAuth":true`) {
        return errors.New("Password auth not enabled")
    }

    // Third step: do the final request, append password to form data:
    values.Del("evaluateUsername")
    values.Set("password", e.Password)
    values.Set("login", "Sign in")

    rawValues = values.Encode()
    req, err = http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
    if err != nil {
        return err
    }
    req.Header.Set("Accept", "text/html")
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
    req.Header.Set("x-requested-with", "XMLHttpRequest")
    req.Header.Set("referer", evernoteLoginURL)
    res, err = e.httpClient.Do(req)
    if err != nil {
        return err
    }

    // Check the body in order to find the redirect URL:
    body, err = ioutil.ReadAll(res.Body)
    if err != nil {
        return err
    }
    bodyStr = string(body)
    matches := evernoteRedirectExpr.FindAllStringSubmatch(bodyStr, -1)
    if len(matches) == 0 {
        return errRedirectURL
    }
    m := matches[0]
    if len(m) < 2 {
        return errRedirectURL
    }
    redirectURL := m[1]
    fmt.Println("Login is ok, redirect URL:", redirectURL)
    return nil
}

成功获取重定向 URL 后,只要您继续使用用于登录过程的 HTTP 客户端,您就应该能够发送经过身份验证的请求,cookie jar 在这里起着非常重要的作用。

要调用此代码,请使用:

func main() {
    evernoteClient := NewEvernoteClient("user@company", "password")
    err := evernoteClient.Login()
    if err != nil {
        panic(err)
    }
}

关于go - 使用 Colly 框架我无法登录 Evernote 帐户,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50576248/

相关文章:

http - 当我尝试使用 BindQuery 方法时,错误 : reflect. flag.mustBeAssignable 使用不可寻址的值

go - 混合:= and = in Go if statements

pointers - 简洁的 nil 检查结构字段指针?

web-scraping - 服务器端的屏幕抓取

ios - 如何在 iPhone sdk 中导入 Evernote 中的图像

for-loop - 具有更多变量的范围循环

python - 无法从图像中提取单词

python - 美丽汤错误 : '<class ' bs4. 元素。标签'>' object has no attribute ' 内容'?

tags - Evernote 对标签使用什么排序规则?

c++ - EverNote API 编译错误