如何找到页面上以 http:// 开头的每个单词并用标签环绕它?
我可以使用正则表达式之类的东西吗?
如何找到页面上以 http:// 开头的每个单词并用标签环绕它?
我可以使用正则表达式之类的东西吗?
我非常不同意 jQuery 可以在这里找到解决方案。当然,您必须处理一些 textNode 元素属性,但是在拆分匹配的节点后将 DOM 重新组合在一起可以使用 jQuery 库变得更容易一些。
以下代码内联记录以解释所采取的操作。我已经把它写成一个 jQuery 插件,以防你只是想把它移到别处。通过这种方式,您可以确定要为其转换 URL 的元素的范围,或者您可以简单地使用 $("body") 选择器。
(function($) {
$.fn.anchorTextUrls = function() {
// Test a text node's contents for URLs and split and rebuild it with an achor
var testAndTag = function(el) {
// Test for URLs along whitespace and punctuation boundaries (don't look too hard or you will be consumed)
var m = el.nodeValue.match(/(https?:\/\/.*?)[.!?;,]?(\s+|"|$)/);
// If we've found a valid URL, m[1] contains the URL
if (m) {
// Clone the text node to hold the "tail end" of the split node
var tail = $(el).clone()[0];
// Substring the nodeValue attribute of the text nodes based on the match boundaries
el.nodeValue = el.nodeValue.substring(0, el.nodeValue.indexOf(m[1]));
tail.nodeValue = tail.nodeValue.substring(tail.nodeValue.indexOf(m[1]) + m[1].length);
// Rebuild the DOM inserting the new anchor element between the split text nodes
$(el).after(tail).after($("<a></a>").attr("href", m[1]).html(m[1]));
// Recurse on the new tail node to check for more URLs
testAndTag(tail);
}
// Behave like a function
return false;
}
// For each element selected by jQuery
this.each(function() {
// Select all descendant nodes of the element and pick out only text nodes
var textNodes = $(this).add("*", this).contents().filter(function() {
return this.nodeType == 3
});
// Take action on each text node
$.each(textNodes, function(i, el) {
testAndTag(el);
});
});
}
}(jQuery));
$("body").anchorTextUrls(); //Sample call
请记住,鉴于我编写它来填充textNodes数组的方式,该方法将查找所有后代文本节点,而不仅仅是直接子文本节点。如果您希望它仅在特定选择器内的文本中替换 URL,请删除添加所选元素的所有后代的 .add("*", this) 调用。
这是一个小提琴示例。
这是 jQuery 没有直接帮助您的少数事情之一。您基本上必须遍历 DOM 树并检查文本节点 ( nodeType === 3
);如果您找到包含要换行的目标文本的文本节点(“http://.....”,无论您想应用什么规则),然后将文本节点(使用splitText
)分成三部分(部分在字符串之前,作为字符串的部分,以及在字符串之后的部分),然后将a
元素放在其中的第二个周围。
这听起来有点复杂,但其实并没有那么糟糕。它只是一个递归下降 walker 函数(用于处理 DOM),一个正则表达式匹配来查找你想要替换的东西,然后调用几次splitText
, createElement
, insertBefore
, appendChild
。
这是一个搜索固定字符串的示例;只需为“http://”添加正则表达式匹配:
walk(document.body, "foo");
function walk(node, targetString) {
var child;
switch (node.nodeType) {
case 1: // Element
for (child = node.firstChild;
child;
child = child.nextSibling) {
walk(child, targetString);
}
break;
case 3: // Text node
handleText(node, targetString);
break;
}
}
function handleText(node, targetString) {
var start, targetNode, followingNode, wrapper;
// Does the text contain our target string?
// (This would be a regex test in your http://... case)
start = node.nodeValue.indexOf(targetString);
if (start >= 0) {
// Split at the beginning of the match
targetNode = node.splitText(start);
// Split at the end of the match
followingNode = targetNode.splitText(targetString.length);
// Wrap the target in an element; in this case, we'll
// use a `span` with a class, but you'd use an `a`.
// First we create the wrapper and insert it in front
// of the target text.
wrapper = document.createElement('span');
wrapper.className = "wrapper";
targetNode.parentNode.insertBefore(wrapper, targetNode);
// Now we move the target text inside it
wrapper.appendChild(targetNode);
// Clean up any empty nodes (in case the target text
// was at the beginning or end of a text ndoe)
if (node.nodeValue.length == 0) {
node.parentNode.removeChild(node);
}
if (followingNode.nodeValue.length == 0) {
followingNode.parentNode.removeChild(followingNode);
}
}
}
更新:如果在同一个文本节点中有多个匹配项(doh!),上面没有处理它。噢究竟发生了什么,我做了一个正则表达式匹配-你将不得不调整正则表达式,也可能做一些后期处理上的每一场比赛,因为这里有什么是过于简单化。但这是一个开始:
// The regexp should have a capture group that
// will be the href. In our case below, we just
// make it the whole thing, but that's up to you.
// THIS REGEXP IS ALMOST CERTAINLY TOO SIMPLISTIC
// AND WILL NEED ADJUSTING (for instance: what if
// the link appears at the end of a sentence and
// it shouldn't include the ending puncutation?).
walk(document.body, /(http:\/\/[^ ]+)/i);
function walk(node, targetRe) {
var child;
switch (node.nodeType) {
case 1: // Element
for (child = node.firstChild;
child;
child = child.nextSibling) {
walk(child, targetRe);
}
break;
case 3: // Text node
handleText(node, targetRe);
break;
}
}
function handleText(node, targetRe) {
var match, targetNode, followingNode, wrapper;
// Does the text contain our target string?
// (This would be a regex test in your http://... case)
match = targetRe.exec(node.nodeValue);
if (match) {
// Split at the beginning of the match
targetNode = node.splitText(match.index);
// Split at the end of the match.
// match[0] is the full text that was matched.
followingNode = targetNode.splitText(match[0].length);
// Wrap the target in an `a` element.
// First we create the wrapper and insert it in front
// of the target text. We use the first capture group
// as the `href`.
wrapper = document.createElement('a');
wrapper.href = match[1];
targetNode.parentNode.insertBefore(wrapper, targetNode);
// Now we move the target text inside it
wrapper.appendChild(targetNode);
// Clean up any empty nodes (in case the target text
// was at the beginning or end of a text ndoe)
if (node.nodeValue.length == 0) {
node.parentNode.removeChild(node);
}
if (followingNode.nodeValue.length == 0) {
followingNode.parentNode.removeChild(followingNode);
}
// Continue with the next match in the node, if any
match = followingNode
? targetRe.exec(followingNode.nodeValue)
: null;
}
}
我实际上不是,但你可以试试
$('a([href^="http://"])').each( function(){
//perform your task
})