php - curl 无法将数据发布到 php 页面

标签 php asp.net curl web-scraping libcurl

我正在尝试将值发布到表单以从 aspx 页面获取数据,但我无法获取数据

  1. 要打开的第一个网址: $url="https://www.clarkcountycourts.us/Anonymous/default.aspx"(此处设置 cookie)

  2. 我们要搜索数据的第二个 url $url3 ="https://www.clarkcountycourts.us/Anonymous/Search.aspx"

当我发布数据时,我只得到第 3 页的 html 而不是第 2 页的数据

下面是我的代码,我错过了什么?请指导我。

<?php 
$url="https://www.clarkcountycourts.us/Anonymous/default.aspx"; 
$cookie="cookie.txt";
$url3 ="https://www.clarkcountycourts.us/Anonymous/Search.aspx";

$ch = curl_init(); 
curl_setopt ($ch, CURLOPT_URL, $url); 
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, FALSE); 
curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6"); 
curl_setopt ($ch, CURLOPT_TIMEOUT, 60); 
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); 
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookie); 
curl_setopt ($ch, CURLOPT_COOKIEFILE, $cookie);  // <-- add this line
curl_setopt ($ch, CURLOPT_REFERER, $url); 
$result = curl_exec ($ch); 



$SearchBy = "2";
$AttorneySearchMode="Name";
$LastName= "Smith";
$FirstName= "William";
$MiddleName ="";
$CaseStatusType= "0";
$SortBy= "fileddate";
$DateFiledOnAfter = "";
$DateFiledOnBefore = "";
$SearchSubmit ="Search";

$fields = array(
    'SearchBy' => urlencode($SearchBy),
    'AttorneySearchMode' => urlencode($AttorneySearchMode),
    'LastName' => urlencode($LastName),
    'FirstName' => urlencode($FirstName),
    'MiddleName' => urlencode($MiddleName),
    'CaseStatusType' => urlencode($CaseStatusType),
    'SortBy' => urlencode($SortBy),
    'DateFiledOnAfter' => urlencode($DateFiledOnAfter),
    'DateFiledOnBefore' => urlencode($DateFiledOnBefore),
    'SearchSubmit' => urlencode($SearchSubmit)
);

$fields_string = "";
foreach ($fields as $key=>$value) {
    $fields_string .= $key.'='.$value.'&';
}
$fields_string = rtrim($fields_string, '&');

curl_setopt($ch, CURLOPT_URL, $url3);
curl_setopt($ch, CURLOPT_HEADER, true );
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch,CURLOPT_POST, 1);
curl_setopt($ch,CURLOPT_POSTFIELDS, $fields_string);
$result2 = curl_exec ($ch); 
print_r($result2);

最佳答案

花了一些时间来弄清楚这一点 - 但下面的代码应该会在搜索后根据您对 William Smith 作为律师的原始查询回显结果。希望这是有道理的....

            define('ROOT','c:/wwwroot');
            $url_base='https://www.clarkcountycourts.us';
            $url_login=$url_base.'/Anonymous/Login.aspx?ReturnUrl=/Anonymous/default.aspx';
            $url_start=$url_base.'/Anonymous/default.aspx';
            $url_search=$url_base.'/Anonymous/Search.aspx?ID=200&NodeID=101,103,104,105,500,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,797,798&NodeDesc=All%20Courts';
            $cookiejar=tempnam( sys_get_temp_dir(), 'cookiejar_' );

            $lastname='smith';
            $firstname='william';

            $formparams=array(
                '__VIEWSTATEGENERATOR',
                '__EVENTVALIDATION',
                '__EVENTTARGET',
                '__EVENTARGUMENT',
                '__VIEWSTATE'
            );
            $stdparams=array(
                'SearchBy' => '2',
                'AttorneySearchMode' => 'Name',
                'LastName' => $lastname,
                'FirstName' => $firstname,
                'MiddleName' => '',
                'CaseStatusType' => '0',
                'SortBy' => 'fileddate',
                'DateFiledOnAfter' => '',
                'DateFiledOnBefore' => '',                  
                'SearchSubmit' => 'Search',
                'CaseSearchMode' => 'CaseNumber',
                'CaseSearchValue' => '',
                'CitationSearchValue' => '',
                'CourtCaseSearchValue' => '',
                'PartySearchMode' => 'Name',
                'cboState' => 'AA',
                'DateOfBirth' => '',
                'DriverLicNum' => '',
                'chkCriminal' => 'on',
                'chkFamily' => 'on',
                'chkCivil' => 'on',
                'chkProbate' => 'on',
                'chkDtRangeCriminal' => 'on',
                'chkDtRangeFamily' => 'on',
                'chkDtRangeCivil' => 'on',
                'chkDtRangeProbate' => 'on',
                'chkCriminalMagist' => 'on',
                'chkFamilyMagist' => 'on',
                'chkCivilMagist' => 'on',
                'chkProbateMagist' => 'on',
                'DateSettingOnAfter' => '',
                'DateSettingOnBefore' => '',
                'SearchType' => 'PARTY',
                'SearchMode' => 'NAME',
                'NameTypeKy' => 'ALIAS',
                'BaseConnKy' => 'AT',
                'StatusType' => 'true',
                'ShowInactive' => '',
                'AllStatusTypes' => 'true',
                'CaseCategories' => '',
                'RequireFirstName' => 'False',
                'CaseTypeIDs' => '',
                'HearingTypeIDs' => ''
            );
            /* A fudge I know, manually build this param - I think it might be constructed using javascript on submission */
            $stdparams['SearchParams']='SearchBy~~Search+By:~~Attorney~~Attorney||AttorneyNameOption~~Party+Search+Mode:~~Name~~Name||LastName~~Last+Name:~~'.$lastname.'~~'.$lastname.'||FirstName~~First+Name:~~'.$firstname.'~~'.$firstname.'||AllOption~~Case+Status:~~0~~All||selectSortBy~~Sort+By:~~Filed+Date~~Filed+Date ';




            /* Initialise curl and set basic options */
            $curl=curl_init();
            if( parse_url( $url_base,PHP_URL_SCHEME )=='https' ){
                curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
                curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, false );
                curl_setopt( $curl, CURLOPT_CAINFO, realpath( ROOT . '/cacert.pem' ) );
            }
            curl_setopt( $curl, CURLINFO_HEADER_OUT, true );
            curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
            curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
            curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
            curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0' );
            curl_setopt( $curl, CURLOPT_HTTPHEADER, array('Host: '.parse_url( $url_login,PHP_URL_HOST ),'Connection: keep-alive') );


            /* Stage 1 - Get initial page to obtain session cookies etc and store in cookiejar...mmmmm, cookies! */
            curl_setopt( $curl, CURLOPT_URL, $url_login );
            curl_setopt( $curl, CURLOPT_COOKIEJAR, $cookiejar );
            curl_setopt( $curl, CURLOPT_COOKIESESSION, true );
            $res=curl_exec( $curl );



            /* Step 2 - GET form page and analyse input elements */
            curl_setopt( $curl, CURLOPT_URL, $url_search );
            curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
            curl_setopt( $curl, CURLOPT_COOKIEFILE, $cookiejar );
            curl_setopt( $curl, CURLOPT_COOKIESESSION, false );
            $res=curl_exec( $curl );



            /* utility class that simplifies getting DOMDocument with error checking etc */
            $dom=new htmldom( $res );
            $html=$dom->gethtml();
            $col=$html->getElementsByTagName('input');

            /* we need to know values for specific fields - add to array of params to be submitted */
            foreach( $col as $index => $node ){
                if( in_array( $node->getAttribute('name'), $formparams ) ) {
                    $stdparams[ $node->getAttribute('name') ]=$node->getAttribute('value');
                }
            }
            /* Not sure how the 'SearchParams' field value is calculated so calculated manually above..... */




            /* Stage 3 - POST */
            /* Prepare search query */
            $info=$res='';
            $querystring=http_build_query( $stdparams, '', '&' );
            curl_setopt( $curl, CURLOPT_URL, $url_search );
            curl_setopt( $curl, CURLOPT_REFERER, $url_search );
            curl_setopt( $curl, CURLOPT_COOKIEFILE, $cookiejar );
            curl_setopt( $curl, CURLOPT_COOKIESESSION, false );
            curl_setopt( $curl, CURLOPT_POST, true );
            curl_setopt( $curl, CURLOPT_POSTFIELDS, $querystring );
            $res=curl_exec( $curl );
            $info=curl_getinfo( $curl );


            /* The final search results - you could manipulate the dom to get specific items if you wished */
            echo '<pre>';
            print_r( $res );
            echo '</pre>';

            /* Close curl */
            curl_close( $curl );

为完整起见,上面提到的实用程序类。

class htmldom{
    private $html;
    public function __construct( $data=false, $convert=true ){
        try{
            if( !$data ) return false;
            libxml_use_internal_errors( true );
            $this->html = new DOMDocument('1.0','utf-8');
            $this->html->validateOnParse=false;
            $this->html->standalone=true;
            $this->html->preserveWhiteSpace=true;
            $this->html->strictErrorChecking=false;
            $this->html->substituteEntities=false;
            $this->html->recover=true;
            $this->html->formatOutput=false;

            $this->html->loadHTML( $convert ? mb_convert_encoding( $data, 'utf-8' ) : $data );

            $parse_errs=serialize( libxml_get_last_error() );
            libxml_clear_errors();

        }catch( Exception $e ){
            die( $e->getMessage() );    
        }
    }
    public function gethtml(){
        return $this->html;
    }
}

关于php - curl 无法将数据发布到 php 页面,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/27331102/

相关文章:

asp.net - 两次调用 RenderSection ......?

curl - Jenkins远程触发带参数

php - 使用 php cURL 从文件头获取文件名

php - 没有 PHP artisan serve,Livewire 无法工作

php - 如何使用PHP和MYSQL将数据插入到两个不同的表中

php - "Cannot pass parameter 2 by reference"PHP错误

curl - 如何将 cURL 转换为 postman?

php - Codeigniter 3 show_404函数问题-MY_Exception无法加载

asp.net - 在为 ASP.net 构建期间缩小内联 javascript?

javascript - .Net Listbox 无法使用 javascript 事件处理程序进行编译?