ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • LDA(Latent Dirichlet Allocation)
    Python 2020. 3. 2. 18:05

    # gensim 패키지 설치하기
    !pip install gensim

     

    # 패키지 로딩하기
    from nltk.corpus import stopwords
    from nltk.stem.porter import PorterStemmer
    from nltk.tokenize import RegexpTokenizer
    from gensim import corpora, models
    from gensim.models import CoherenceModel
    import gensim
    import matplotlib.pyplot as plt

     

    # 텍스트 전처리를 위한 클래스 생성하기
    tokenizer = RegexpTokenizer("[\w]+")

     

    # 불용어
    stop_words = stopwords.words("english")

     

    # 형태소 분석기
    porter_stemmer = PorterStemmer()

     

    # 구약성경의 각 1장 1절
    Gensis = "In the beginning God created the heavens and the earth."
    Exos = "These are the names of the sons of Israel who went to Egypt with Jacob, each with his family"
    Leviticus = "The LORD called to Moses and spoke to him from the Tent of Meeting. He said,"
    Numbers = "The LORD spoke to Moses in the Tent of Meeting in the Desert of Sinai on the first day of the second month of the second year after the Israelites came out of Egypt. He said"
    Deuteronomy = "These are the words Moses spoke to all Israel in the desert east of the Jordan--that is, in the Arabah--opposite Suph, between Paran and Tophel, Laban, Hazeroth and Dizahab"
    Joshua = "After the death of Moses the servant of the LORD, the LORD said to Joshua son of Nun, Moses' aide"
    Judges = "After the death of Joshua, the Israelites asked the LORD, Who will be the first to go up and fight for us against the Canaanites?"
    Ruth = "In the days when the judges ruled, there was a famine in the land, and a man from Bethlehem in Judah, together with his wife and two sons, went to live for a while in the country of Moab"
    Samuel1 = "There was a certain man from Ramathaim, a Zuphite from the hill country of Ephraim, whose name was Elkanah son of Jeroham, the son of Elihu, the son of Tohu, the son of Zuph, an Ephraimite."
    Samuel2 = "After the death of Saul, David returned from defeating the Amalekites and stayed in Ziklag two days."
    Kings1 = "When King David was old and well advanced in years, he could not keep warm even when they put covers over him."
    Kings2 = "After Ahab's death, Moab rebelled against Israel."
    Chronicles1 = "Adam, Seth, Enosh,"
    Chronicles2 = "Solomon son of David established himself firmly over his kingdom, for the LORD his God was with him and made him exceedingly great."
    Ezra = "In the first year of Cyrus king of Persia, in order to fulfill the word of the LORD spoken by Jeremiah, the LORD moved the heart of Cyrus king of Persia to make a proclamation throughout his realm and to put it in writing"
    Nehemiah = "The words of Nehemiah son of Hacaliah: In the month of Kislev in the twentieth year, while I was in the citadel of Susa,"
    Esther = "This is what happened during the time of Xerxes, the Xerxes who ruled over 127 provinces stretching from India to Cush"
    Job = "In the land of Uz there lived a man whose name was Job. This man was blameless and upright; he feared God and shunned evil."
    Psalms = "Blessed is the man who does not walk in the counsel of the wicked or stand in the way of sinners or sit in the seat of mockers."
    Proverbs = "The proverbs of Solomon son of David, king of Israel"
    Ecclesiastes = "The words of the Teacher, son of David, king in Jerusalem"
    SongofSongs = "Solomon's Song of Songs."
    Isaiah = "The vision concerning Judah and Jerusalem that Isaiah son of Amoz saw during the reigns of Uzziah, Jotham, Ahaz and Hezekiah, kings of Judah."
    Jeremiah = "The words of Jeremiah son of Hilkiah, one of the priests at Anathoth in the territory of Benjamin."
    Lamentations = "How deserted lies the city, once so full of people! How like a widow is she, who once was great among the nations! She who was queen among the provinces has now become a slave."
    Ezekiel = "In the thirtieth year, in the fourth month on the fifth day, while I was among the exiles by the Kebar River, the heavens were opened and I saw visions of God."
    Daniel = "In the third year of the reign of Jehoiakim king of Judah, Nebuchadnezzar king of Babylon came to Jerusalem and besieged it."
    Hosea = "The word of the LORD that came to Hosea son of Beeri during the reigns of Uzziah, Jotham, Ahaz and Hezekiah, kings of Judah, and during the reign of Jeroboam son of Jehoash king of Israel"
    Joel = "The word of the LORD that came to Joel son of Pethuel."
    Amos = "The words of Amos, one of the shepherds of Tekoa--what he saw concerning Israel two years before the earthquake, when Uzziah was king of Judah and Jeroboam son of Jehoash was king of Israel."
    Obadiah = "The vision of Obadiah. This is what the Sovereign LORD says about Edom-- We have heard a message from the LORD : An envoy was sent to the nations to say, Rise, and let us go against her for battle"
    Jonah = "The word of the LORD came to Jonah son of Amittai"
    Micah = "The word of the LORD that came to Micah of Moresheth during the reigns of Jotham, Ahaz and Hezekiah, kings of Judah--the vision he saw concerning Samaria and Jerusalem."
    Nahum = "An oracle concerning Nineveh. The book of the vision of Nahum the Elkoshite."
    Habakkuk = "The oracle that Habakkuk the prophet received."
    Zephaniah = "The word of the LORD that came to Zephaniah son of Cushi, the son of Gedaliah, the son of Amariah, the son of Hezekiah, during the reign of Josiah son of Amon king of Judah"
    Haggai = "In the second year of King Darius, on the first day of the sixth month, the word of the LORD came through the prophet Haggai to Zerubbabel son of Shealtiel, governor of Judah, and to Joshua son of Jehozadak, the high priest"
    Zechariah = "In the eighth month of the second year of Darius, the word of the LORD came to the prophet Zechariah son of Berekiah, the son of Iddo"
    Malachi = "An oracle: The word of the LORD to Israel through Malachi."

     

    old_testament = [Gensis, Exos, Leviticus, Numbers, Deuteronomy,
    Joshua, Judges, Ruth, Samuel1, Samuel2,
    Kings1, Kings2, Chronicles1, Chronicles2, Ezra,
    Nehemiah, Esther, Job, Psalms, Proverbs,
    Ecclesiastes, SongofSongs, Isaiah, Jeremiah,
    Lamentations, Ezekiel, Daniel, Hosea, Joel,
    Amos, Obadiah, Jonah, Micah, Nahum,
    Habakkuk, Zephaniah, Haggai, Zechariah, Malachi]

     

    # 명사 추출
    texts = []
    for w in old_testament:
         raw = w.lower()
         tokens = tokenizer.tokenize(raw)
         stopped_tokens = [i for i in tokens if not i in stop_words]
         stemmed_tokens = [porter_stemmer.stem(i) for i in stopped_tokens]
         texts.append(stemmed_tokens)

     

    # Perplexity 구하고 그래프 작성하기
    perplexity_values = []
    for i in range(2, 39):
         ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = i, id2word = dictionary)
         perplexity_values.append(ldamodel.log_perplexity(corpus))

     

    plt.plot(x, perplexity_values)
    plt.xlabel("Number of Topics")
    plt.ylabel("Perplexity Score")
    plt.show()

     

     

    # Coherence score 구하고 그래프 작성하기
    coherence_values = []
    for i in range(2, 39):
         ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = i, id2word = dictionary)
         coherence_model_lda = CoherenceModel(model = ldamodel,
                                                                                        texts = texts,

                                                                                        dictionary = dictionary,
                                                                                        topn = 10)
         coherence_lda = coherence_model_lda.get_coherence()
         coherence_values.append(coherence_lda)

     

    plt.plot(x, coherence_values)
    plt.xlabel("Number of Topics")
    plt.ylabel("Coherence Score")
    plt.show()

     

     

    [참고] 잡아라! 텍스트마이닝 with 파이썬, 서대호 지음, BJ, p97~100

    'Python' 카테고리의 다른 글

    통계적 기반의 연관어 분석  (0) 2020.03.04
    사전 기반의 감성분석(Sentiment Analysis)  (0) 2020.03.03
    텍스트 구조적 군집분석  (0) 2020.03.02
    텍스트 군집분석  (0) 2020.03.02
    word cloud  (0) 2020.03.02
Designed by Tistory.