php - curl 无法将数据发布到 php 页面

标签 php curl web-scraping libcurl

我正在尝试将值发布到表单以从 aspx 页面获取数据,但我无法获取数据

  1. 要打开的第一个网址: $url=""(此处设置 cookie)

  2. 我们要搜索数据的第二个 url $url3 =""

当我发布数据时,我只得到第 3 页的 html 而不是第 2 页的数据


$url3 ="";

$ch = curl_init(); 
curl_setopt ($ch, CURLOPT_URL, $url); 
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, FALSE); 
curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv: Gecko/20070725 Firefox/"); 
curl_setopt ($ch, CURLOPT_TIMEOUT, 60); 
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); 
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookie); 
curl_setopt ($ch, CURLOPT_COOKIEFILE, $cookie);  // <-- add this line
curl_setopt ($ch, CURLOPT_REFERER, $url); 
$result = curl_exec ($ch); 

$SearchBy = "2";
$LastName= "Smith";
$FirstName= "William";
$MiddleName ="";
$CaseStatusType= "0";
$SortBy= "fileddate";
$DateFiledOnAfter = "";
$DateFiledOnBefore = "";
$SearchSubmit ="Search";

$fields = array(
    'SearchBy' => urlencode($SearchBy),
    'AttorneySearchMode' => urlencode($AttorneySearchMode),
    'LastName' => urlencode($LastName),
    'FirstName' => urlencode($FirstName),
    'MiddleName' => urlencode($MiddleName),
    'CaseStatusType' => urlencode($CaseStatusType),
    'SortBy' => urlencode($SortBy),
    'DateFiledOnAfter' => urlencode($DateFiledOnAfter),
    'DateFiledOnBefore' => urlencode($DateFiledOnBefore),
    'SearchSubmit' => urlencode($SearchSubmit)

$fields_string = "";
foreach ($fields as $key=>$value) {
    $fields_string .= $key.'='.$value.'&';
$fields_string = rtrim($fields_string, '&');

curl_setopt($ch, CURLOPT_URL, $url3);
curl_setopt($ch, CURLOPT_HEADER, true );
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch,CURLOPT_POST, 1);
curl_setopt($ch,CURLOPT_POSTFIELDS, $fields_string);
$result2 = curl_exec ($ch); 


花了一些时间来弄清楚这一点 - 但下面的代码应该会在搜索后根据您对 William Smith 作为律师的原始查询回显结果。希望这是有道理的....

            $cookiejar=tempnam( sys_get_temp_dir(), 'cookiejar_' );


                'SearchBy' => '2',
                'AttorneySearchMode' => 'Name',
                'LastName' => $lastname,
                'FirstName' => $firstname,
                'MiddleName' => '',
                'CaseStatusType' => '0',
                'SortBy' => 'fileddate',
                'DateFiledOnAfter' => '',
                'DateFiledOnBefore' => '',                  
                'SearchSubmit' => 'Search',
                'CaseSearchMode' => 'CaseNumber',
                'CaseSearchValue' => '',
                'CitationSearchValue' => '',
                'CourtCaseSearchValue' => '',
                'PartySearchMode' => 'Name',
                'cboState' => 'AA',
                'DateOfBirth' => '',
                'DriverLicNum' => '',
                'chkCriminal' => 'on',
                'chkFamily' => 'on',
                'chkCivil' => 'on',
                'chkProbate' => 'on',
                'chkDtRangeCriminal' => 'on',
                'chkDtRangeFamily' => 'on',
                'chkDtRangeCivil' => 'on',
                'chkDtRangeProbate' => 'on',
                'chkCriminalMagist' => 'on',
                'chkFamilyMagist' => 'on',
                'chkCivilMagist' => 'on',
                'chkProbateMagist' => 'on',
                'DateSettingOnAfter' => '',
                'DateSettingOnBefore' => '',
                'SearchType' => 'PARTY',
                'SearchMode' => 'NAME',
                'NameTypeKy' => 'ALIAS',
                'BaseConnKy' => 'AT',
                'StatusType' => 'true',
                'ShowInactive' => '',
                'AllStatusTypes' => 'true',
                'CaseCategories' => '',
                'RequireFirstName' => 'False',
                'CaseTypeIDs' => '',
                'HearingTypeIDs' => ''
            /* A fudge I know, manually build this param - I think it might be constructed using javascript on submission */
            $stdparams['SearchParams']='SearchBy~~Search+By:~~Attorney~~Attorney||AttorneyNameOption~~Party+Search+Mode:~~Name~~Name||LastName~~Last+Name:~~'.$lastname.'~~'.$lastname.'||FirstName~~First+Name:~~'.$firstname.'~~'.$firstname.'||AllOption~~Case+Status:~~0~~All||selectSortBy~~Sort+By:~~Filed+Date~~Filed+Date ';

            /* Initialise curl and set basic options */
            if( parse_url( $url_base,PHP_URL_SCHEME )=='https' ){
                curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
                curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, false );
                curl_setopt( $curl, CURLOPT_CAINFO, realpath( ROOT . '/cacert.pem' ) );
            curl_setopt( $curl, CURLINFO_HEADER_OUT, true );
            curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
            curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
            curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
            curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0' );
            curl_setopt( $curl, CURLOPT_HTTPHEADER, array('Host: '.parse_url( $url_login,PHP_URL_HOST ),'Connection: keep-alive') );

            /* Stage 1 - Get initial page to obtain session cookies etc and store in cookiejar...mmmmm, cookies! */
            curl_setopt( $curl, CURLOPT_URL, $url_login );
            curl_setopt( $curl, CURLOPT_COOKIEJAR, $cookiejar );
            curl_setopt( $curl, CURLOPT_COOKIESESSION, true );
            $res=curl_exec( $curl );

            /* Step 2 - GET form page and analyse input elements */
            curl_setopt( $curl, CURLOPT_URL, $url_search );
            curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
            curl_setopt( $curl, CURLOPT_COOKIEFILE, $cookiejar );
            curl_setopt( $curl, CURLOPT_COOKIESESSION, false );
            $res=curl_exec( $curl );

            /* utility class that simplifies getting DOMDocument with error checking etc */
            $dom=new htmldom( $res );

            /* we need to know values for specific fields - add to array of params to be submitted */
            foreach( $col as $index => $node ){
                if( in_array( $node->getAttribute('name'), $formparams ) ) {
                    $stdparams[ $node->getAttribute('name') ]=$node->getAttribute('value');
            /* Not sure how the 'SearchParams' field value is calculated so calculated manually above..... */

            /* Stage 3 - POST */
            /* Prepare search query */
            $querystring=http_build_query( $stdparams, '', '&' );
            curl_setopt( $curl, CURLOPT_URL, $url_search );
            curl_setopt( $curl, CURLOPT_REFERER, $url_search );
            curl_setopt( $curl, CURLOPT_COOKIEFILE, $cookiejar );
            curl_setopt( $curl, CURLOPT_COOKIESESSION, false );
            curl_setopt( $curl, CURLOPT_POST, true );
            curl_setopt( $curl, CURLOPT_POSTFIELDS, $querystring );
            $res=curl_exec( $curl );
            $info=curl_getinfo( $curl );

            /* The final search results - you could manipulate the dom to get specific items if you wished */
            echo '<pre>';
            print_r( $res );
            echo '</pre>';

            /* Close curl */
            curl_close( $curl );


class htmldom{
    private $html;
    public function __construct( $data=false, $convert=true ){
            if( !$data ) return false;
            libxml_use_internal_errors( true );
            $this->html = new DOMDocument('1.0','utf-8');

            $this->html->loadHTML( $convert ? mb_convert_encoding( $data, 'utf-8' ) : $data );

            $parse_errs=serialize( libxml_get_last_error() );

        }catch( Exception $e ){
            die( $e->getMessage() );    
    public function gethtml(){
        return $this->html;

关于php - curl 无法将数据发布到 php 页面,我们在Stack Overflow上找到一个类似的问题:

相关文章: - 两次调用 RenderSection ......?

curl - Jenkins远程触发带参数

php - 使用 php cURL 从文件头获取文件名

php - 没有 PHP artisan serve,Livewire 无法工作

php - 如何使用PHP和MYSQL将数据插入到两个不同的表中

php - "Cannot pass parameter 2 by reference"PHP错误

curl - 如何将 cURL 转换为 postman?

php - Codeigniter 3 show_404函数问题-MY_Exception无法加载 - 在为 构建期间缩小内联 javascript?

javascript - .Net Listbox 无法使用 javascript 事件处理程序进行编译?