我正在尝试CountVectorizer()循环使用,但我得到了意想不到的结果。另一方面,如果我在循环之外使用它,那么它工作正常。我认为逻辑存在一些小问题。
from sklearn.feature_extraction.text import CountVectorizer
keys_1 = ['funny', 'amusing', 'humorous', 'hilarious', 'jolly']
keys_2 = ['horror', 'fear', 'shock', 'panic', 'scream']
keys_3 = ['romantic', 'intimate', 'passionate', 'love', 'fond']
text = ('funny amusing fear passionate')
for i in range(3):
keys = 'keys_' + str(i+1)
cv = CountVectorizer(vocabulary = keys)
data = cv.fit_transform([text]).toarray()
print(data)
cv1 = CountVectorizer(vocabulary = keys_1)
data = cv1.fit_transform([text]).toarray()
print(data)
cv2 = CountVectorizer(vocabulary = keys_2)
data = cv2.fit_transform([text]).toarray()
print(data)
cv3 = CountVectorizer(vocabulary = keys_3)
data = cv3.fit_transform([text]).toarray()
print(data)
输出
[[0 0 0 0 0 0]]
[[0 0 0 0 0 0]]
[[0 0 0 0 0 0]]
[[1 1 0 0 0]]
[[0 1 0 0 0]]
[[0 0 1 0 0]]