javascript - 在 HTML 中解析 JavaScript

标签 javascript python regex web-scraping beautifulsoup

我在 HTML 中有这个 Script 标记,我需要进行解析以获取 new String() 中的所有数据。有没有办法解析这些数据? 这里必须使用 REGEX 还是有更有效的方法来做到这一点?

<script language="JavaScript1.1">
function X(n) {
  arrayindex = n;
  s=new String(dataArray[arrayindex]);
  a=s.split(",");
      dateheading="Date:";
  if (arrayindex < 0) {
    document.dform.dinput.value="";
  } else {
    if (arrayindex == 0) {
      document.dform.dinput.value=dateheading + a[0] + " O=" + a[1] + " H=" + a[2] + " L=" + a[3] + " C=" + a[4];
    } else {
      document.dform.dinput.value=dateheading + a[0] + " O=" + a[1] + " H=" + a[2] + " L=" + a[3] + " C=" + a[4];
    }
  }
}
var dataStr = new String('11/27/18 07:40,1.1322,1.13228,1.132,1.13212|11/27/18 07:35,1.13192,1.1322,1.13192,1.13203|11/27/18 07:30,1.1318,1.13202,1.1318,1.1319|11/27/18 07:25,1.13165,1.13191,1.13165,1.13188|11/27/18 07:20,1.1317,1.13174,1.13143,1.13143|11/27/18 07:15,1.13182,1.13192,1.1316,1.13177|11/27/18 07:10,1.13208,1.13213,1.1319,1.13213|11/27/18 07:05,1.1319,1.1321,1.13176,1.1321|11/27/18 07:00,1.13196,1.1321,1.13196,1.132|11/27/18 06:55,1.13232,1.13232,1.13187,1.13187|11/27/18 06:50,1.1319,1.13234,1.1319,1.1323|11/27/18 06:45,1.13216,1.13217,1.1319,1.132|11/27/18 06:40,1.13188,1.13198,1.13184,1.13198|11/27/18 06:35,1.13222,1.13222,1.132,1.13208|11/27/18 06:30,1.1318,1.1323,1.1318,1.1323|11/27/18 06:25,1.13178,1.1321,1.13178,1.13191|11/27/18 06:20,1.13184,1.13184,1.13162,1.13162|11/27/18 06:15,1.1316,1.13179,1.1316,1.13179|11/27/18 06:10,1.13194,1.132,1.1315,1.1318|11/27/18 06:05,1.1319,1.13225,1.1319,1.13191|11/27/18 06:00,1.13169,1.13174,1.1316,1.13169|11/27/18 05:55,1.1312,1.13166,1.1312,1.13166|11/27/18 05:50,1.13101,1.13127,1.131,1.13122|11/27/18 05:45,1.1313,1.1313,1.13111,1.13111|11/27/18 05:40,1.1317,1.1317,1.1313,1.13141|11/27/18 05:35,1.13159,1.13159,1.13135,1.13142|11/27/18 05:30,1.13203,1.13203,1.1317,1.1317|11/27/18 05:25,1.13187,1.13217,1.13187,1.13208|11/27/18 05:20,1.13226,1.13226,1.13185,1.13185|11/27/18 05:15,1.13151,1.13163,1.13151,1.13163|11/27/18 05:10,1.13141,1.13149,1.1313,1.13149|11/27/18 05:05,1.13153,1.13153,1.13128,1.1314|11/27/18 05:00,1.13114,1.13148,1.13114,1.13148|11/27/18 04:55,1.13149,1.13149,1.13106,1.13106|11/27/18 04:50,1.13212,1.13219,1.13203,1.13203|11/27/18 04:45,1.13232,1.13244,1.13209,1.13209|11/27/18 04:40,1.13225,1.13244,1.13223,1.13223|11/27/18 04:35,1.13233,1.13233,1.13217,1.13223|11/27/18 04:30,1.1322,1.13222,1.13206,1.1321|11/27/18 04:25,1.13174,1.13227,1.13174,1.13227|11/27/18 04:20,1.1316,1.13174,1.1315,1.13174|11/27/18 04:15,1.13181,1.13181,1.1314,1.13163|11/27/18 04:10,1.1319,1.13212,1.1319,1.13191|11/27/18 04:05,1.1317,1.13206,1.1317,1.13206|11/27/18 04:00,1.1311,1.13153,1.1311,1.13142|11/27/18 03:55,1.1313,1.1313,1.13113,1.13113|11/27/18 03:50,1.13136,1.13146,1.131,1.131|11/27/18 03:45,1.13147,1.13164,1.13145,1.13145|11/27/18 03:40,1.13091,1.13164,1.13091,1.13158|11/27/18 03:35,1.13109,1.13123,1.13106,1.1311|11/27/18 03:30,1.1308,1.13135,1.1308,1.13117|11/27/18 03:25,1.13115,1.13121,1.1309,1.1309|11/27/18 03:20,1.13089,1.13105,1.13082,1.13105|11/27/18 03:15,1.13076,1.13085,1.1304,1.13085|11/27/18 03:10,1.13072,1.13075,1.1304,1.13069|11/27/18 03:05,1.13111,1.13111,1.1309,1.13107|11/27/18 03:00,1.131,1.13151,1.13073,1.13073|11/27/18 02:55,1.13201,1.13201,1.1311,1.13122|11/27/18 02:50,1.13209,1.13226,1.1318,1.13218|11/27/18 02:45,1.13202,1.13229,1.13202,1.13209|11/27/18 02:40,1.13293,1.13293,1.1321,1.13215|11/27/18 02:35,1.13262,1.13311,1.13257,1.13257|11/27/18 02:30,1.13191,1.13389,1.13191,1.13353|11/27/18 02:25,1.1321,1.1321,1.13159,1.13164|11/27/18 02:20,1.13302,1.13302,1.13263,1.13263|11/27/18 02:15,1.1328,1.13304,1.13269,1.13304|11/27/18 02:10,1.13341,1.13359,1.1331,1.1331|11/27/18 02:05,1.13312,1.13328,1.13312,1.13325|11/27/18 02:00,1.13286,1.13308,1.13286,1.13308|11/27/18 01:55,1.13255,1.1328,1.1324,1.1327|11/27/18 01:50,1.13279,1.13297,1.13263,1.13263|11/27/18 01:45,1.1331,1.1331,1.13265,1.13266|');
dataArray = dataStr.split("|");
dataStr = null;
dataLength = dataArray.length;
</script>

摘要:如何提取11/27/18 07:40,1.1322,1.13228,1.132,1.1321...

最佳答案

您可以使用正则表达式轻松完成此操作:

test_js = "new String('x|y|z')"
string = re.search(r"new String\('(.*)'\)", test_js)[1]
# 'x|y|z'
parts = string.split('|')
# ['x', 'y', 'z']

关于javascript - 在 HTML 中解析 JavaScript,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53500555/

相关文章:

javascript - 如何生成保留标点符号的简单字谜?

python - 同时运行多个独立的python脚本

python - 在 python 中使用正则表达式 re.split 拆分堆叠实体

javascript - 无法正确 trim 和粗体匹配两个字符串中的单词

javascript - 无法使用 javascript 编辑 css

python - 如何连续多次从列表中生成随机项目,但每次都会生成不同的项目

python - 我在哪里可以找到 numpy.where() 源代码?

MySQL 和正则表达式 - 作为 SELECT 列返回,仅包含表列的正则表达式匹配部分

JavaScript 正则表达式

javascript - 如何将一个数组中的值添加到另一个数组