๐ก [๋ฐํ] แแ ตแแแ กแแ กแจแแ ฎแแ ฅ แแ ตแแ กแจแแ กแแ ณแซ AI แแ กแทแแ ฅแผ แแ ฎแซแแ ฅแจ | MODUPOP | ๋ชจ๋์ ์ฐ๊ตฌ์
- ๋ณธ ํ๋ก์ ํธ๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ ๊ฐ์ ๋ถ์๊ณผ ํต์ฌ ๋ฌธ๊ตฌ๋ฅผ ์ถ์ถํ๋ ๋ฐฉ๋ฒ์ ๋๋ค.
- ๋ฐ์ดํฐ ์์ง, ๋ผ๋ฒจ๋ง, ๋ชจ๋ธ๋ง, ๋ฐ์ดํฐ ์ฆ๊ฐ, ๋ฌธ๊ตฌ ์ถ์ถ๋ฐฉ๋ฒ์ ์ ๋ถ ๋ค๋ฃน๋๋ค.
- ๋ชจ๋ธ์ ๊ฐ์ ๋ถ์ ์ฑ๋ฅ์ Precision ๊ธฐ์ค 0.9 ์ด์ ๋ฌ์ฑ
- ์ฌ์ฉ์๊ฐ ์ ํํ ๋งฅ์ฃผ์ ๋ํ ํต์ฌ ๋ฌธ๊ตฌ์ ์ถ์ถํ๋ ๋ฐฉ๋ฒ ์ ๊ณต
๊ฐ์ ๋ถ๋ฅ ๋ฐ ํต์ฌ ๋ฌธ๊ตฌ ์ถ์ถ Demo (์์ด๋ง ์ง์)
!git clone https://github.com/sgr1118/Bert_beer_sentiment_anlysis.git
pip install -r requirements.txtUse to Pre_Trained_Model [colab]
import torch
from transformers import BertTokenizerFast, BertForSequenceClassification
from torch.nn.functional import softmax
import matplotlib.pyplot as plt
# ๋ชจ๋ธ ๋ก๋
model = BertForSequenceClassification.from_pretrained('GiRak/beer-sentiment-bert') # HuggingFace ์ฌ์ ํ์ต ๋ชจ๋ธ ์
๋ก๋
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
# ํ ํฌ๋์ด์ ์ด๊ธฐํ
tokenizer = BertTokenizerFast.from_pretrained('GiRak/beer-sentiment-bert') # HuggingFace ์ฌ์ ํ์ต ๋ชจ๋ธ ์
๋ก๋
def analyze_sentiment(sentence):
# ๋ฌธ์ฅ์ ํ ํฌ๋์ด์งํ๊ณ ๋ชจ๋ธ ์
๋ ฅ์ผ๋ก ๋ณํ
inputs = tokenizer(sentence, return_tensors='pt')
inputs = inputs.to(device)
# ๋ชจ๋ธ์ ํตํด ๊ฐ์ ๋ถ๋ฅ ์ํ
outputs = model(**inputs)
logits = outputs.logits
probabilities = softmax(logits, dim=1)
# ๊ฐ์ ๋ถ๋ฅ ํ๋ฅ ์ถ์ถ
sentiment_labels = ['Negative', 'Positive']
sentiment_probabilities = {label: probability.item() for label, probability in zip(sentiment_labels, probabilities[0])}
return sentiment_probabilities
sentences = ['I took a sip and immediately discarded it. How could a beer have such a strong cinnamon flavor?']
# Lists to store probabilities
positive_probs = []
negative_probs = []
for sentence in sentences:
sentiment_probabilities = analyze_sentiment(sentence)
positive_prob = sentiment_probabilities['Positive'] * 100
negative_prob = sentiment_probabilities['Negative'] * 100
positive_probs.append(positive_prob)
negative_probs.append(negative_prob)
print("Sentence:", sentence)
print("Positive Probability:", int(positive_prob), "%")
print("Negative Probability:", int(negative_prob), "%")
# Plotting
x = ['Positive', 'Negative']
plt.bar(x, [positive_probs[0], negative_probs[0]], color=['green', 'red'])
plt.xlabel('Sentiment')
plt.ylabel('Probability (%)')
plt.title('Sentiment Analysis Result')
plt.tight_layout()
plt.show()# Load the model
kw_model = KeyBERT('all-mpnet-base-v2')
# Use KeyphraseCountVectorize
from tqdm import tqdm
def apply_keybert(sentence):
keywords = kw_model.extract_keywords(sentence, vectorizer=KeyphraseCountVectorizer(), stop_words='english', top_n=3)
return ', '.join([keyword for keyword, score in keywords]) # ํค์๋๋ ์ค์๋ ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ต๋ 3๊ฐ๊น์ง ์ ์ฅ๋๋ค.
# Creating Columns and Storing Keywords
df['keywords'] = df['Review'].apply(apply_keybert)
# Counting Keywords and Keyphrase
from collections import Counter
# ๋ชจ๋ ์ธ๋ฑ์ค์ 'keywords' ์ปฌ๋ผ์ ํฉ์ณ์ ๋จ์ด๋ค์ ์นด์ดํธํ๋ ํจ์
def count_all_keywords(dataframe):
all_keywords = dataframe['keywords'].str.split(', ').sum()
keyword_counts = Counter(all_keywords)
sorted_keyword_counts = keyword_counts.most_common() # ํค์๋ ๋น๋๊ฐ ๋ง์ ์์ผ๋ก ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ ๋ ฌํ๋ค.
return sorted_keyword_counts
# ๋ชจ๋ ์ธ๋ฑ์ค์ 'keywords' ์ปฌ๋ผ์ ์๋ ๋จ์ด๋ค์ ์นด์ดํธ
sorted_all_keyword_counts = count_all_keywords(beer_Wired_iStout_pre)| No | ๋ด์ฉ | ๊นํ๋ธ |
|---|---|---|
| 1 | ๋ฐ์ดํฐ ์์ง | ๐ |
| 2 | ๋ฐ์ดํฐ ๋ผ๋ฒจ๋ง | ๐ |
| 3 | ํ์ธ ํ๋ | ๐ |
- ์ข๋ ์ธ๋ถํ๋ ๊ฐ์ ๋ถ๋ฅ๋ฅผ ์ํํ๊ธฐ์ํด ์ค๋ฆฝ ๋ผ๋ฒจ๋ง ๊ธฐ์ค์ ํ๋ฆฝํ๊ณ ์ ์ฉํ ์์
- KeyBERT๋ฅผ ์ฌ์ฉํ์ฌ ํต์ฌ ๋ฌธ๊ตฌ ์ถ์ถ ์๊ฐ์ด ๋ฐ์ดํฐ๊ฐ ๋ง์์ง์๋ก ๊ธธ์ด์ง๋ค. ์ค์๊ฐ ์๋ต์ผ๋ก ํค์๋ ์ถ์ถ ๊ฒฐ๊ณผ๋ฅผ ๋ณด์ฌ์ฃผ๊ธฐ ํ๋ค๋ค๋ ๋จ์ ์ด์๋ค.
๋ณธ ํ๋ก์ ํธ๋ ๋ชจ๋์์ฐ๊ตฌ์์ K-๋์งํธ ํ๋ซํผ์ผ๋ก๋ถํฐ ์ง์๋ฐ์์ต๋๋ค.
