python - 登录网页并另存为png

标签 python selenium-webdriver phantomjs python-requests

我有一个需要身份验证才能访问的网站,然后我想为其制作图像。我正在使用以下脚本 -

import os
import requests
from subprocess import Popen, PIPE
from selenium import webdriver

abspath = lambda *p: os.path.abspath(os.path.join(*p))
ROOT = abspath(os.path.dirname(__file__))


def execute_command(command):
    result = Popen(command, shell=True, stdout=PIPE).stdout.read()
    if len(result) > 0 and not result.isspace():
        raise Exception(result)


def do_screen_capturing(url, screen_path, width, height):
    print "Capturing screen.."
    driver = webdriver.PhantomJS()
    # it save service log file in same directory
    # if you want to have log file stored else where
    # initialize the webdriver.PhantomJS() as
    # driver = webdriver.PhantomJS(service_log_path='/var/log/phantomjs/ghostdriver.log')
    driver.set_script_timeout(30)
    if width and height:
        driver.set_window_size(width, height)
    driver.get(url)
    driver.save_screenshot(screen_path)


def do_crop(params):
    print "Croping captured image.."
    command = [
        'convert',
        params['screen_path'],
        '-crop', '%sx%s+0+0' % (params['width'], params['height']),
        params['crop_path']
    ]
    execute_command(' '.join(command))


def do_thumbnail(params):
    print "Generating thumbnail from croped captured image.."
    command = [
        'convert',
        params['crop_path'],
        '-filter', 'Lanczos',
        '-thumbnail', '%sx%s' % (params['width'], params['height']),
        params['thumbnail_path']
    ]
    execute_command(' '.join(command))


def get_screen_shot(**kwargs):
    url = kwargs['url']
    width = int(kwargs.get('width', 1024)) # screen width to capture
    height = int(kwargs.get('height', 768)) # screen height to capture
    filename = kwargs.get('filename', 'screen.png') # file name e.g. screen.png
    path = kwargs.get('path', ROOT) # directory path to store screen

    crop = kwargs.get('crop', False) # crop the captured screen
    crop_width = int(kwargs.get('crop_width', width)) # the width of crop screen
    crop_height = int(kwargs.get('crop_height', height)) # the height of crop screen
    crop_replace = kwargs.get('crop_replace', False) # does crop image replace original screen capture?

    thumbnail = kwargs.get('thumbnail', False) # generate thumbnail from screen, requires crop=True
    thumbnail_width = int(kwargs.get('thumbnail_width', width)) # the width of thumbnail
    thumbnail_height = int(kwargs.get('thumbnail_height', height)) # the height of thumbnail
    thumbnail_replace = kwargs.get('thumbnail_replace', False) # does thumbnail image replace crop image?

    screen_path = abspath(path, filename)
    crop_path = thumbnail_path = screen_path

    if thumbnail and not crop:
        raise Exception, 'Thumnail generation requires crop image, set crop=True'

    do_screen_capturing(url, screen_path, width, height)

    if crop:
        if not crop_replace:
            crop_path = abspath(path, 'crop_'+filename)
        params = {
            'width': crop_width, 'height': crop_height,
            'crop_path': crop_path, 'screen_path': screen_path}
        do_crop(params)

        if thumbnail:
            if not thumbnail_replace:
                thumbnail_path = abspath(path, 'thumbnail_'+filename)
            params = {
                'width': thumbnail_width, 'height': thumbnail_height,
                'thumbnail_path': thumbnail_path, 'crop_path': crop_path}
            do_thumbnail(params)
    return screen_path, crop_path, thumbnail_path


if __name__ == '__main__':
    '''
        Requirements:
        Install NodeJS
        Using Node's package manager install phantomjs: npm -g install phantomjs
        install selenium (in your virtualenv, if you are using that)
        install imageMagick
        add phantomjs to system path (on windows)
    '''
    s = requests.Session()
    s.auth = ('username', 'password')
    r = s.get('https://website.com:8443/path/to/site', verify=False)
    url = r.text
    screen_path, crop_path, thumbnail_path = get_screen_shot(
        url=url, filename='test.png',
        crop=True, crop_replace=False,
        thumbnail=True, thumbnail_replace=False,
        thumbnail_width=200, thumbnail_height=150,
    )

我知道它正在进行身份验证,因为我可以打印 r.status 并获取 200 并且 r.headers 返回 header 。然而,r.text给出了编码错误。上面的代码没有失败,但返回了一个空白图像。

这是在 Windows 计算机上。

编辑 - 如果我删除仅点击 URL 而不登录的请求 -

    url = 'https://website.com:8443/path/to/site'
    screen_path, crop_path, thumbnail_path = get_screen_shot(
        url=url, filename='test.png',
        crop=True, crop_replace=False,
        thumbnail=True, thumbnail_replace=False,
        thumbnail_width=200, thumbnail_height=150,
    )

它截取登录页面的屏幕截图。我想要做的是登录后获取页面的屏幕截图。

最佳答案

看起来 save_screenshot() 在页面未完全加载时被调用。

在这种情况下,您需要explicitly wait例如,登录表单变得可见。示例:

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver.get(url)

WebDriverWait(driver, 10).until(
    EC.visibility_of_element_located((By.ID, "myForm"))
)

driver.save_screenshot(screen_path)

关于python - 登录网页并另存为png,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28822948/

相关文章:

python - 使用单个查询从多个表中提取

python - PyPandoc 与 PyInstaller 结合使用

python - 与 ROC 交叉验证?

python - centos无法安装mysqlclient

java - 获取所有开头的链接

selenium - 组合 XPath 选择器来选择后代或自身

java - PhantomJS 和 Selenium Webdriver - 如何清除 session

java - 如何检查所有复选框?尝试选中所有复选框,测试通过,但从视觉上我看不到它们已被选中

phantomjs - 在PhantomJs中下载作为POST请求响应附件的文件

web-scraping - 假装 Firefox 而不是 Phantom.js