javascript - 使用 Javascript 从脚本标签中抓取信息

标签 javascript web-scraping

我正在尝试抓取网页脚本标签内的信息。我已经想出如何获取信息,但无法弄清楚如何将其处理成数据对象。

我可以使用 document.querySelector(x).innerHTML 获取信息。这是出现的 innerHTML(第一部分似乎没有格式化为此处的代码)。

" Y = YUI(YUI_CONFIG).use( 'squarespace-commerce-analytics',

function(Y) {
  Y.on('domready', function() {
    Y.Squarespace.CommerceAnalytics.checkoutConfirmed({'id':'12345676','orderNumber':'00065','websiteId':'12345678','purchasedCartId':'1234567','testMode':true,'grandTotal':{'currencyCode':'USD','value':3239,'decimalValue':'32.39','fractionalDigits':2},'grandTotalFormatted':'$32.39','subtotal':{'currencyCode':'USD','value':2300,'decimalValue':'23.00','fractionalDigits':2},'subtotalFormatted':'$23.00','taxTotal':{'currencyCode':'USD','value':204,'decimalValue':'2.04','fractionalDigits':2},'taxTotalFormatted':'$2.04','shippingTotal':{'currencyCode':'USD','value':735,'decimalValue':'7.35','fractionalDigits':2},'shippingTotalFormatted':'$7.35','billingDetails':{'customer':{'address':{'city':'New York','region':'NY','country':'United States'}}},'items':[{'sku':'123456','productName':'This is a Product','unitPrice':{'currencyCode':'USD','value':2300,'decimalValue':'23.00','fractionalDigits':2},'quantity':1}]});
  });
});

这段代码显示了我得到的 innerHTML,我希望将每个数据项(id、orderNumber、productName 等)格式化为一个对象,以便我可以使用 GTM 更好地跟踪电子商务。我不确定如何以我需要的方式操作它

最佳答案

如果你用 " 替换所有的 ',这将是你可以解析的 JSON,所以如果你使用正则表达式来匹配 checkoutConfirmed );,你可以提取几乎-JSON,将其转换为 JSON,然后解析它:

const html = document.querySelector('script[type="dontexecute"]').innerHTML;
const singleQuotedJSON = html.match(/checkoutConfirmed\((.+?)\);/)[1];
const actualJSON = singleQuotedJSON.replace(/'/g, '"');
const obj = JSON.parse(actualJSON);
console.log(obj);
<script type="dontexecute">Y = YUI(YUI_CONFIG).use(
    'squarespace-commerce-analytics',

    function(Y) {
      Y.on('domready', function() {
        Y.Squarespace.CommerceAnalytics.checkoutConfirmed({'id':'12345676','orderNumber':'00065','websiteId':'12345678','purchasedCartId':'1234567','testMode':true,'grandTotal':{'currencyCode':'USD','value':3239,'decimalValue':'32.39','fractionalDigits':2},'grandTotalFormatted':'$32.39','subtotal':{'currencyCode':'USD','value':2300,'decimalValue':'23.00','fractionalDigits':2},'subtotalFormatted':'$23.00','taxTotal':{'currencyCode':'USD','value':204,'decimalValue':'2.04','fractionalDigits':2},'taxTotalFormatted':'$2.04','shippingTotal':{'currencyCode':'USD','value':735,'decimalValue':'7.35','fractionalDigits':2},'shippingTotalFormatted':'$7.35','billingDetails':{'customer':{'address':{'city':'New York','region':'NY','country':'United States'}}},'items':[{'sku':'123456','productName':'This is a Product','unitPrice':{'currencyCode':'USD','value':2300,'decimalValue':'23.00','fractionalDigits':2},'quantity':1}]});
      });
    });</script>

现在您已经有了一个格式正确的对象,您可以随心所欲地操作它。例如,要提取 orderNumber,请引用 obj.orderNumber:

const html = document.querySelector('script[type="dontexecute"]').innerHTML;
const singleQuotedJSON = html.match(/checkoutConfirmed\((.+?)\);/)[1];
const actualJSON = singleQuotedJSON.replace(/'/g, '"');
const obj = JSON.parse(actualJSON);
console.log(obj.orderNumber);
<script type="dontexecute">Y = YUI(YUI_CONFIG).use(
    'squarespace-commerce-analytics',

    function(Y) {
      Y.on('domready', function() {
        Y.Squarespace.CommerceAnalytics.checkoutConfirmed({'id':'12345676','orderNumber':'00065','websiteId':'12345678','purchasedCartId':'1234567','testMode':true,'grandTotal':{'currencyCode':'USD','value':3239,'decimalValue':'32.39','fractionalDigits':2},'grandTotalFormatted':'$32.39','subtotal':{'currencyCode':'USD','value':2300,'decimalValue':'23.00','fractionalDigits':2},'subtotalFormatted':'$23.00','taxTotal':{'currencyCode':'USD','value':204,'decimalValue':'2.04','fractionalDigits':2},'taxTotalFormatted':'$2.04','shippingTotal':{'currencyCode':'USD','value':735,'decimalValue':'7.35','fractionalDigits':2},'shippingTotalFormatted':'$7.35','billingDetails':{'customer':{'address':{'city':'New York','region':'NY','country':'United States'}}},'items':[{'sku':'123456','productName':'This is a Product','unitPrice':{'currencyCode':'USD','value':2300,'decimalValue':'23.00','fractionalDigits':2},'quantity':1}]});
      });
    });</script>

关于javascript - 使用 Javascript 从脚本标签中抓取信息,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58053572/

相关文章:

javascript对象在html页面上工作但无法在wordpress中加载

javascript - bootstrap slide + fancy box 解决方案

javascript - 尝试使 JavaScript 中的游戏组件透明

python - 使用 python beautiful soup 进行网络抓取的空值

javascript - 如何在没有 xmlns 属性的情况下序列化 XML?

JavaScript:动态生成的对象键

python - Google map 错误列表索引必须是整数或切片,而不是 WebElement

python - Scrapy:连接被拒绝

php - PHP 网页抓取

javascript - 网页抓取 : Automating button click