-
由 openaiops 创作于8183bb6e
tfidf_extractor.py 646 B
import public_function as pf
from sklearn.feature_extraction.text import TfidfVectorizer
def extract_tfidf(all_texts, store_path):
TFIDF_X = TfidfVectorizer().fit_transform(all_texts).toarray()
nor_TFIDF_X = pf.min_max_normalized(TFIDF_X)
TFIDF_X_path = store_path + '/TFIDF.pkl'
nor_TFIDF_X_path = store_path + '/nor_TFIDF_X.pkl'
pf.save(TFIDF_X_path, TFIDF_X)
pf.save(nor_TFIDF_X_path, nor_TFIDF_X)
if __name__ == '__main__':
config = pf.get_config()
# 读取texts
text_path = config['text_path'] + '/all_texts.pkl'
all_texts = pf.load(text_path)
extract_tfidf(all_texts, config['feature_path'])