计算字符串中的单词

IT技术 javascript
2021-01-27 00:00:05

我试图以这种方式计算文本中的单词:

function WordCount(str) {
  var totalSoFar = 0;
  for (var i = 0; i < WordCount.length; i++)
    if (str(i) === " ") { // if a space is found in str
      totalSoFar = +1; // add 1 to total so far
  }
  totalsoFar += 1; // add 1 to totalsoFar to account for extra space since 1 space = 2 words
}

console.log(WordCount("Random String"));

我想我已经很好地理解了这一点,除了我认为这个if陈述是错误的。检查是否str(i)包含空格并加 1 的部分。

编辑:

我发现(感谢 Blender)我可以用更少的代码来做到这一点:

function WordCount(str) { 
  return str.split(" ").length;
}

console.log(WordCount("hello world"));
6个回答

使用方括号,而不是圆括号:

str[i] === " "

charAt

str.charAt(i) === " "

你也可以这样做.split()

return str.split(' ').length;
您的解决方案是否适用于由空格字符以外的任何字符分隔的单词?按换行符或制表符说?
2021-03-23 00:00:05
计算字符串 JavaScript 中特定单词的总数stackoverflow.com/a/65036248/4752258
2021-04-03 00:00:05
@Blender 很好的解决方案,但是对于字符串中省略的双空格,这可能会给出错误的结果..
2021-04-04 00:00:05
我想我明白了你的意思,我上面编辑过的原始问题中的代码看起来没问题吗?
2021-04-06 00:00:05
@ipalibowhyte 来处理我这样做str.split(' ').filter((word) => { if (word !== '') return word })似乎工作正常:)
2021-04-11 00:00:05

在重新发明轮子之前尝试这些

使用 JavaScript 计算字符串中的单词数

function countWords(str) {
  return str.trim().split(/\s+/).length;
}

来自http://www.mediacollege.com/internet/javascript/text/count-words.html

function countWords(s){
    s = s.replace(/(^\s*)|(\s*$)/gi,"");//exclude  start and end white-space
    s = s.replace(/[ ]{2,}/gi," ");//2 or more space to 1
    s = s.replace(/\n /,"\n"); // exclude newline with a start spacing
    return s.split(' ').filter(function(str){return str!="";}).length;
    //return s.split(' ').filter(String).length; - this can also be used
}

来自Use JavaScript to count words in a string, without using a regex - 这将是最好的方法

function WordCount(str) {
     return str.split(' ')
            .filter(function(n) { return n != '' })
            .length;
}

作者注释:

您可以修改此脚本以按您喜欢的任何方式计算单词。重要的部分是s.split(' ').length——这计算空间。该脚本尝试在计数之前删除所有额外的空格(双空格等)。如果文本包含两个单词之间没有空格,则将它们视为一个单词,例如“第一句。下一句开始”。

我从未见过这种语法: s = s.replace(/(^\s*)|(\s*$)/gi,""); s = s.replace(/[ ]{2,}/gi," "); s = s.replace(/\n /,"\n"); 每行是什么意思?很抱歉这么有需要
2021-03-18 00:00:05
太好了,我只是要求您解释您编写的代码。我以前从未见过语法,想知道它的含义。没关系,我提出了一个单独的问题,有人深入回答了我的问题。抱歉问了这么多。
2021-04-01 00:00:05
请注意,它为空输入返回 1。
2021-04-04 00:00:05
任何事物?这段代码非常令人困惑,您从字面上复制和粘贴的网站根本没有帮助。我只是感到困惑,我知道它应该检查没有空格的单词我们的双空格但是如何?仅仅一百万个随机放置的字符真的没有帮助......
2021-04-05 00:00:05
str.split(/\s+/).length 并没有真正按原样工作:尾随空格被视为另一个词。
2021-04-11 00:00:05

计算字符串中单词的另一种方法。此代码计算仅包含字母数字字符和“_”、“'”、“-”、“'”字符的单词。

function countWords(str) {
  var matches = str.match(/[\w\d\’\'-]+/gi);
  return matches ? matches.length : 0;
}
这将算作'test'一个完整的词,而该词只有test如果您计算重复的单词,这会产生问题。
2021-03-17 00:00:05
您不需要’'在正则表达式中转义使用/[\w\d’'-]+/gi以避免ESLint没有无用的逃生警告
2021-03-18 00:00:05
也可以考虑添加,’'-以便“Cat's meow”不算作 3 个单词。还有“中间”
2021-03-26 00:00:05
@mpen 感谢您的建议。我已经根据它更新了我的答案。
2021-04-02 00:00:05
我的字符串中的第一个字符是右引号仅供参考,而不是反引号 :-D
2021-04-10 00:00:05

清理字符串后,您可以匹配非空白字符或单词边界。

这里有两个简单的正则表达式来捕获字符串中的单词:

  • 非空白字符序列: /\S+/g
  • 单词边界之间的有效字符: /\b[a-z\d]+\b/g

下面的示例显示了如何使用这些捕获模式从字符串中检索字数。

/*Redirect console output to HTML.*/document.body.innerHTML='';console.log=function(s){document.body.innerHTML+=s+'\n';};
/*String format.*/String.format||(String.format=function(f){return function(a){return f.replace(/{(\d+)}/g,function(m,n){return"undefined"!=typeof a[n]?a[n]:m})}([].slice.call(arguments,1))});

// ^ IGNORE CODE ABOVE ^
//   =================

// Clean and match sub-strings in a string.
function extractSubstr(str, regexp) {
    return str.replace(/[^\w\s]|_/g, '')
        .replace(/\s+/g, ' ')
        .toLowerCase().match(regexp) || [];
}

// Find words by searching for sequences of non-whitespace characters.
function getWordsByNonWhiteSpace(str) {
    return extractSubstr(str, /\S+/g);
}

// Find words by searching for valid characters between word-boundaries.
function getWordsByWordBoundaries(str) {
    return extractSubstr(str, /\b[a-z\d]+\b/g);
}

// Example of usage.
var edisonQuote = "I have not failed. I've just found 10,000 ways that won't work.";
var words1 = getWordsByNonWhiteSpace(edisonQuote);
var words2 = getWordsByWordBoundaries(edisonQuote);

console.log(String.format('"{0}" - Thomas Edison\n\nWord count via:\n', edisonQuote));
console.log(String.format(' - non-white-space: ({0}) [{1}]', words1.length, words1.join(', ')));
console.log(String.format(' - word-boundaries: ({0}) [{1}]', words2.length, words2.join(', ')));
body { font-family: monospace; white-space: pre; font-size: 11px; }


寻找独特的词

您还可以创建单词映射以获得唯一计数。

function cleanString(str) {
    return str.replace(/[^\w\s]|_/g, '')
        .replace(/\s+/g, ' ')
        .toLowerCase();
}

function extractSubstr(str, regexp) {
    return cleanString(str).match(regexp) || [];
}

function getWordsByNonWhiteSpace(str) {
    return extractSubstr(str, /\S+/g);
}

function getWordsByWordBoundaries(str) {
    return extractSubstr(str, /\b[a-z\d]+\b/g);
}

function wordMap(str) {
    return getWordsByWordBoundaries(str).reduce(function(map, word) {
        map[word] = (map[word] || 0) + 1;
        return map;
    }, {});
}

function mapToTuples(map) {
    return Object.keys(map).map(function(key) {
        return [ key, map[key] ];
    });
}

function mapToSortedTuples(map, sortFn, sortOrder) {
    return mapToTuples(map).sort(function(a, b) {
        return sortFn.call(undefined, a, b, sortOrder);
    });
}

function countWords(str) {
    return getWordsByWordBoundaries(str).length;
}

function wordFrequency(str) {
    return mapToSortedTuples(wordMap(str), function(a, b, order) {
        if (b[1] > a[1]) {
            return order[1] * -1;
        } else if (a[1] > b[1]) {
            return order[1] * 1;
        } else {
            return order[0] * (a[0] < b[0] ? -1 : (a[0] > b[0] ? 1 : 0));
        }
    }, [1, -1]);
}

function printTuples(tuples) {
    return tuples.map(function(tuple) {
        return padStr(tuple[0], ' ', 12, 1) + ' -> ' + tuple[1];
    }).join('\n');
}

function padStr(str, ch, width, dir) { 
    return (width <= str.length ? str : padStr(dir < 0 ? ch + str : str + ch, ch, width, dir)).substr(0, width);
}

function toTable(data, headers) {
    return $('<table>').append($('<thead>').append($('<tr>').append(headers.map(function(header) {
        return $('<th>').html(header);
    })))).append($('<tbody>').append(data.map(function(row) {
        return $('<tr>').append(row.map(function(cell) {
            return $('<td>').html(cell);
        }));
    })));
}

function addRowsBefore(table, data) {
    table.find('tbody').prepend(data.map(function(row) {
        return $('<tr>').append(row.map(function(cell) {
            return $('<td>').html(cell);
        }));
    }));
    return table;
}

$(function() {
    $('#countWordsBtn').on('click', function(e) {
        var str = $('#wordsTxtAra').val();
        var wordFreq = wordFrequency(str);
        var wordCount = countWords(str);
        var uniqueWords = wordFreq.length;
        var summaryData = [
            [ 'TOTAL', wordCount ],
            [ 'UNIQUE', uniqueWords ]
        ];
        var table = toTable(wordFreq, ['Word', 'Frequency']);
        addRowsBefore(table, summaryData);
        $('#wordFreq').html(table);
    });
});
table {
    border-collapse: collapse;
    table-layout: fixed;
    width: 200px;
    font-family: monospace;
}
thead {
    border-bottom: #000 3px double;;
}
table, td, th {
    border: #000 1px solid;
}
td, th {
    padding: 2px;
    width: 100px;
    overflow: hidden;
}

textarea, input[type="button"], table {
    margin: 4px;
    padding: 2px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>

<h1>Word Frequency</h1>
<textarea id="wordsTxtAra" cols="60" rows="8">Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.

Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.

But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth.</textarea><br />
<input type="button" id="countWordsBtn" value="Count Words" />
<div id="wordFreq"></div>

这是一个很棒且全面的答案。感谢所有示例,它们真的很有用!
2021-03-15 00:00:05

我认为这种方法比你想要的多

var getWordCount = function(v){
    var matches = v.match(/\S+/g) ;
    return matches?matches.length:0;
}