-
由 openaiops 创作于8183bb6e
bow_extractor.py 619 B
import public_function as pf
from sklearn.feature_extraction.text import CountVectorizer
def extract_bow(all_texts, store_path):
BOW_X = CountVectorizer().fit_transform(all_texts).toarray()
nor_BOW_X = pf.min_max_normalized(BOW_X)
BOW_X_path = store_path + '/BOW.pkl'
nor_BOW_X_path = store_path + '/nor_BOW_X.pkl'
pf.save(BOW_X_path, BOW_X)
pf.save(nor_BOW_X_path, nor_BOW_X)
if __name__ == '__main__':
config = pf.get_config()
# 读取texts
text_path = config['text_path'] + '/all_texts.pkl'
all_texts = pf.load(text_path)
extract_bow(all_texts, config['feature_path'])