연관어분석
-
word2vec 기반 연관어 분석Python 2020. 3. 4. 12:46
# 패키지 로딩하기 import pandas as pd import numpy as np import glob from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer from nltk.tokenize import RegexpTokenizer from gensim.models.word2vec import Word2Vec # 데이터 읽어오기/벡터로 만들기 pos_review = glob.glob("d:/deeplearning/textmining/pos/*.txt")[0:100] pos_lines = [] for i in pos_review: try: f = open(i, "r") temp = f.readlines()[0] po..
-
통계적 기반의 연관어 분석Python 2020. 3. 4. 12:28
# 패키지 로딩하기 import pandas as pd import numpy as np import glob from scipy import sparse from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from afinn import Afinn from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer from nltk.tokenize import RegexpTokenizer # 100개의 데이터 읽어오기 pos_review = glob.glob("d:/deeplearn..