Skip to content
代码片段 群组 项目
tfidf_extractor.py 646 B
import public_function as pf
from sklearn.feature_extraction.text import TfidfVectorizer


def extract_tfidf(all_texts, store_path):
    TFIDF_X = TfidfVectorizer().fit_transform(all_texts).toarray()
    nor_TFIDF_X = pf.min_max_normalized(TFIDF_X)
    TFIDF_X_path = store_path + '/TFIDF.pkl'
    nor_TFIDF_X_path = store_path + '/nor_TFIDF_X.pkl'
    pf.save(TFIDF_X_path, TFIDF_X)
    pf.save(nor_TFIDF_X_path, nor_TFIDF_X)


if __name__ == '__main__':
    config = pf.get_config()
    # 读取texts
    text_path = config['text_path'] + '/all_texts.pkl'
    all_texts = pf.load(text_path)
    extract_tfidf(all_texts, config['feature_path'])