javascript - 将元素内容作为纯文本获取所有后代

标签 javascript jquery html text

我对 jQuery 中的一些简单机制感到困惑。 假设我的网站上有以下 div 容器:

<div id="myDiv">
    <math xmlns="http://www.w3.org/1998/Math/MathML">
      <mrow>
        <mi>n</mi>
        <mo stretchy="false">≥</mo>
        <mn>2</mn>
        </mrow>
    </math>

    - Some text here
    > Some quote here
</div>

现在我想在不做任何修改的情况下检索内容 (!!!) 并在之后进行一些我自己的更改。

第一次尝试

$('#myDiv').text();

哦,所有的子标签都不见了。那不是我想要的。

第二次尝试

$('#myDiv').html();

嗯,看起来好多了。但仔细观察就会发现,最后一行中的“>”现在也是 HTML 编码的。那不是我想要的。

问题

如何获取 DOM 元素内容的纯文本副本?

更新

除了已接受的答案及其对 “未转义文本” 的提示外,我还发现了这个 nice little workaround using a <script>-Tag .

最佳答案

您需要深入挖掘以找出一些可能的解决方案。

例如,我在堆栈周围挖掘并看到这三个线程,例如 THREAD1 , THREAD2 , THREAD3 .

使用这些资源,我编造了这样的东西:

//Translation Look Up
function get_html_translation_table (table, quote_style) {
  var entities = {},
    hash_map = {},
    decimal;
  var constMappingTable = {},
    constMappingQuoteStyle = {};
  var useTable = {},
    useQuoteStyle = {};

  // Translate arguments
  constMappingTable[0] = 'HTML_SPECIALCHARS';
  constMappingTable[1] = 'HTML_ENTITIES';
  constMappingQuoteStyle[0] = 'ENT_NOQUOTES';
  constMappingQuoteStyle[2] = 'ENT_COMPAT';
  constMappingQuoteStyle[3] = 'ENT_QUOTES';

  useTable = !isNaN(table) ? constMappingTable[table] : table ? table.toUpperCase() : 'HTML_SPECIALCHARS';
  useQuoteStyle = !isNaN(quote_style) ? constMappingQuoteStyle[quote_style] : quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT';

  if (useTable !== 'HTML_SPECIALCHARS' && useTable !== 'HTML_ENTITIES') {
    throw new Error("Table: " + useTable + ' not supported');
    // return false;
  }

  entities['38'] = '&amp;';
  if (useTable === 'HTML_ENTITIES') {
    entities['160'] = '&nbsp;';
    entities['161'] = '&iexcl;';
    entities['162'] = '&cent;';
    entities['163'] = '&pound;';
    entities['164'] = '&curren;';
    entities['165'] = '&yen;';
    entities['166'] = '&brvbar;';
    entities['167'] = '&sect;';
    entities['168'] = '&uml;';
    entities['169'] = '&copy;';
    entities['170'] = '&ordf;';
    entities['171'] = '&laquo;';
    entities['172'] = '&not;';
    entities['173'] = '&shy;';
    entities['174'] = '&reg;';
    entities['175'] = '&macr;';
    entities['176'] = '&deg;';
    entities['177'] = '&plusmn;';
    entities['178'] = '&sup2;';
    entities['179'] = '&sup3;';
    entities['180'] = '&acute;';
    entities['181'] = '&micro;';
    entities['182'] = '&para;';
    entities['183'] = '&middot;';
    entities['184'] = '&cedil;';
    entities['185'] = '&sup1;';
    entities['186'] = '&ordm;';
    entities['187'] = '&raquo;';
    entities['188'] = '&frac14;';
    entities['189'] = '&frac12;';
    entities['190'] = '&frac34;';
    entities['191'] = '&iquest;';
    entities['192'] = '&Agrave;';
    entities['193'] = '&Aacute;';
    entities['194'] = '&Acirc;';
    entities['195'] = '&Atilde;';
    entities['196'] = '&Auml;';
    entities['197'] = '&Aring;';
    entities['198'] = '&AElig;';
    entities['199'] = '&Ccedil;';
    entities['200'] = '&Egrave;';
    entities['201'] = '&Eacute;';
    entities['202'] = '&Ecirc;';
    entities['203'] = '&Euml;';
    entities['204'] = '&Igrave;';
    entities['205'] = '&Iacute;';
    entities['206'] = '&Icirc;';
    entities['207'] = '&Iuml;';
    entities['208'] = '&ETH;';
    entities['209'] = '&Ntilde;';
    entities['210'] = '&Ograve;';
    entities['211'] = '&Oacute;';
    entities['212'] = '&Ocirc;';
    entities['213'] = '&Otilde;';
    entities['214'] = '&Ouml;';
    entities['215'] = '&times;';
    entities['216'] = '&Oslash;';
    entities['217'] = '&Ugrave;';
    entities['218'] = '&Uacute;';
    entities['219'] = '&Ucirc;';
    entities['220'] = '&Uuml;';
    entities['221'] = '&Yacute;';
    entities['222'] = '&THORN;';
    entities['223'] = '&szlig;';
    entities['224'] = '&agrave;';
    entities['225'] = '&aacute;';
    entities['226'] = '&acirc;';
    entities['227'] = '&atilde;';
    entities['228'] = '&auml;';
    entities['229'] = '&aring;';
    entities['230'] = '&aelig;';
    entities['231'] = '&ccedil;';
    entities['232'] = '&egrave;';
    entities['233'] = '&eacute;';
    entities['234'] = '&ecirc;';
    entities['235'] = '&euml;';
    entities['236'] = '&igrave;';
    entities['237'] = '&iacute;';
    entities['238'] = '&icirc;';
    entities['239'] = '&iuml;';
    entities['240'] = '&eth;';
    entities['241'] = '&ntilde;';
    entities['242'] = '&ograve;';
    entities['243'] = '&oacute;';
    entities['244'] = '&ocirc;';
    entities['245'] = '&otilde;';
    entities['246'] = '&ouml;';
    entities['247'] = '&divide;';
    entities['248'] = '&oslash;';
    entities['249'] = '&ugrave;';
    entities['250'] = '&uacute;';
    entities['251'] = '&ucirc;';
    entities['252'] = '&uuml;';
    entities['253'] = '&yacute;';
    entities['254'] = '&thorn;';
    entities['255'] = '&yuml;';
  }

  if (useQuoteStyle !== 'ENT_NOQUOTES') {
    entities['34'] = '&quot;';
  }
  if (useQuoteStyle === 'ENT_QUOTES') {
    entities['39'] = '&#39;';
  }
  entities['60'] = '&lt;';
  entities['62'] = '&gt;';


  // ascii decimals to real symbols
  for (decimal in entities) {
    if (entities.hasOwnProperty(decimal)) {
      hash_map[String.fromCharCode(decimal)] = entities[decimal];
    }
  }

  return hash_map;
}

//decode
function html_entity_decode (string, quote_style) {
    var hash_map = {},
    symbol = '',
    tmp_str = '',
    entity = '';
  tmp_str = string.toString();

  if (false === (hash_map = get_html_translation_table('HTML_ENTITIES', quote_style))) {
    return false;
  }

  delete(hash_map['&']);
  hash_map['&'] = '&amp;';

  for (symbol in hash_map) {
    entity = hash_map[symbol];
    tmp_str = tmp_str.split(entity).join(symbol);
  }
  tmp_str = tmp_str.split('&#039;').join("'");

  return tmp_str;
}

//Now Get your Content
var d = document.createElement("div");
d.innerHTML = $('#myDiv').html() ;
console.log(html_entity_decode (d.innerHTML));

这给出了如下所示的结果(我想这就是您要找的):

<math xmlns="http://www.w3.org/1998/Math/MathML">
  <mrow>
    <mi>n</mi>
    <mo stretchy="false">=</mo>
    <mn>2</mn>
    </mrow>
</math>

- Some text here
> Some quote here

这是所有相关代码的工作 fiddle :

http://jsfiddle.net/Ddjag/1/

这里的技巧是将文本内容解码为未转义字符,而这正是函数 decode 所做的。

关于javascript - 将元素内容作为纯文本获取所有后代,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19157156/

相关文章:

javascript - 如何显示自定义音频播放器的当前时间和总时长?

javascript - 无法使用javascript获取输入字段值?

javascript - 根据页面返回的 url 编辑复选框

html - 多列列出

html - 自动列宽为 `overflow-wrap: break-word`

javascript - 使用 JQuery AJAX 的自动保存架构

javascript - 如何删除 map() 中的重复项

javascript - 通过 ajax 调用在 Flask 中渲染模板

jquery - 当我使用自定义选择框时按钮位置尴尬

html - 减小窗口大小时,Div 背景颜色未填充文本高度的 100%