javascript - 仅当不是 html 时才转换为 anchor

标签 javascript regex

假设,我有以下文本:

This is a sample url : http://example.com.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" />
<img src="http://example.com/sample2.png" class="sample-image" />
Another url http://example2.com

这是我用来解析上述文本的正则表达式代码:

const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&@#\/%?=~_|!:,.;-]*[-A-Z0-9+&@#\/%=~_|])/gim;
    const emails = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;

    return function(text) {
        if(text.match(urls)) {
            text = text.replace(urls, "<a href=\"$1\">$1</a>")
        }
        if(text.match(emails)) {
            text = text.replace(emails, "<a href=\"mailto:$1\">$1</a>")
        }

        return text
    }

上面的代码对我的文本执行此操作:

This is a sample url : <a href="http://example.com">http://example.com</a>.
These are some images:
<img src="<a href=" class="sample-image">"http://example.com/sample1.png">"
<img src="<a href=" class="sample-image">"http://example.com/sample2.png">"
Another url <a href="http://example2.com">http://example2.com</a>

我希望得到以下结果:

This is a sample url : <a href="http://example.com">http://example.com</a>.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" /> <!-- Do not change -->
<img src="http://example.com/sample2.png" class="sample-image" /> <!-- Do not change -->
Another url <a href="http://example2.com">http://example2.com</a>

我怎样才能达到上面的结果?

最佳答案

最好避免使用正则表达式来解析 HTML。

RegEx match open tags except XHTML self-contained tags

Using regular expressions to parse HTML: why not?


相反,生成一个包含内容的临时 DOM 元素并获取所有文本节点以更新内容。在文本节点内容上应用将方法替换为正则表达式。

var html = 'This is a sample url : http://example.com These are some images:<img src="http://example.com/sample1.png" class="sample-image" /><img src="http://example.com/sample2.png" class="sample-image" />Another url http://example2.com';

// regex for replacing content
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&@#\/%?=~_|!:,.;-]*[-A-Z0-9+&@#\/%=~_|])/gim;
const emails = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;

// for replacing the content
function update(text) {
  return text.replace(urls, "<a href=\"$1\">$1</a>").replace(emails, "<a href=\"mailto:$1\">$1</a>");
}

// create a DOM element
var temp = document.createElement('div');
// set the string as your content
temp.innerHTML = html;

console.log(
  // get all child nodes and convert into array
  // for older browser use `[].slice.call()`
  Array.from(temp.childNodes)
  // iterate over the elements to generate the content array
  .map(function(n) {
    // if node is text then update the content and return it
    if (n.nodeType == 3)
      return update(n.textContent);
    // otehrwise return the html content
    else
      return n.outerHTML;
    // join them 
  }).join('')
)


更新:如果您需要保留转义的 HTML,那么您需要添加一个额外的方法来生成相应的文本节点的转义 HTML。

var html = 'This is a sample url : http://example.com These are some images:<img src="http://example.com/sample1.png" class="sample-image" /><img src="http://example.com/sample2.png" class="sample-image" />Another url http://example2.com  hi &lt;a href="#"&gt;Sam&lt;/a&gt;';

// regex for replacing content
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&@#\/%?=~_|!:,.;-]*[-A-Z0-9+&@#\/%=~_|])/gim;
const emails = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;

// for replacing the content
function update(text) {
  return text.replace(urls, "<a href=\"$1\">$1</a>").replace(emails, "<a href=\"mailto:$1\">$1</a>");
}

// function for generating escaped html content for text node
function getEncodedText(node) {
    // temporary element  
    var temp = document.createElement('div');
    // append the text node
    temp.appendChild(node);
    // get the escaped html content
    return temp.innerHTML
  }
  // create a DOM element
var temp = document.createElement('div');
// set the string as your content
temp.innerHTML = html;

console.log(
  // get all child nodes and convert into array
  // for older browser use `[].slice.call()`
  Array.from(temp.childNodes)
  // iterate over the elements to generate the content array
  .map(function(n) {
    // if node is text then update the escaped html content and return it
    if (n.nodeType == 3)
      return update(getEncodedText(n));
    // otehrwise return the html content
    else
      return n.outerHTML;
    // join them 
  }).join('')
)

关于javascript - 仅当不是 html 时才转换为 anchor ,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41649408/

相关文章:

javascript - 如何在ajax请求中将数组js数组发送到php页面

javascript - 在 Jquery onclick 函数中添加 Spinner 函数

javascript - 在 Handlebars layout.hbs 中的代码下方插入模板代码

javascript - 在现有 CSS 链接之前将 CSS 链接注入(inject)头部

c++ - 使用 wregex 会产生垃圾输出?

PHP,将带括号的字符串转换为数组

regex - R中的文本字符串隔离和转换

javascript - 我们可以用 `name ` 设置属性而不是 `class` 或 `id`

python正则表达式以跳过模式拆分某些模式

python - Python3 : Split+concatenate string with regex - giving syntax error: can't assign to operator