|
| 1 | +import pandas as pd |
| 2 | +import numpy as np |
| 3 | +from sklearn.feature_extraction.text import TfidfVectorizer |
| 4 | +from sklearn.model_selection import train_test_split |
| 5 | +from sklearn.svm import SVC |
| 6 | +from sklearn.metrics import accuracy_score, classification_report |
| 7 | + |
| 8 | + |
| 9 | +def sentiment_analysis(): |
| 10 | + """ |
| 11 | + Perform sentiment analysis using an SVM classifier. |
| 12 | +
|
| 13 | + The function reads the data from a CSV file, preprocesses it, and trains an SVM classifier |
| 14 | + for sentiment analysis on the 'text' column with the 'label' column as the target. |
| 15 | +
|
| 16 | + Prints the accuracy and classification report on the test data. |
| 17 | + """ |
| 18 | + # Load data from a CSV file (replace 'data.csv' with your data file) |
| 19 | + data = pd.read_csv('data.csv') |
| 20 | + |
| 21 | + # Preprocess data (remove any special characters, convert to lowercase, etc.) |
| 22 | + data['text'] = data['text'].apply(preprocess_text) |
| 23 | + |
| 24 | + # Split the data into features (X) and labels (y) |
| 25 | + X = data['text'] |
| 26 | + y = data['label'] |
| 27 | + |
| 28 | + # Convert text data to numerical features using TF-IDF |
| 29 | + vectorizer = TfidfVectorizer() |
| 30 | + X = vectorizer.fit_transform(X) |
| 31 | + |
| 32 | + # Split the data into training and testing sets |
| 33 | + X_train, X_test, y_train, y_test = train_test_split( |
| 34 | + X, y, test_size=0.2, random_state=42) |
| 35 | + |
| 36 | + # Train an SVM classifier |
| 37 | + svm_classifier = SVC(kernel='linear') |
| 38 | + svm_classifier.fit(X_train, y_train) |
| 39 | + |
| 40 | + # Make predictions on the test set |
| 41 | + y_pred = svm_classifier.predict(X_test) |
| 42 | + |
| 43 | + # Calculate and print accuracy and classification report |
| 44 | + accuracy = accuracy_score(y_test, y_pred) |
| 45 | + print("Accuracy:", accuracy) |
| 46 | + print("Classification Report:") |
| 47 | + print(classification_report(y_test, y_pred, zero_division=1)) |
| 48 | + |
| 49 | + |
| 50 | +def preprocess_text(text): |
| 51 | + # Replace special characters with spaces |
| 52 | + text = text.replace('\n', ' ') |
| 53 | + text = text.replace('\t', ' ') |
| 54 | + text = text.replace('-', ' ') |
| 55 | + |
| 56 | + # Convert to lowercase |
| 57 | + text = text.lower() |
| 58 | + |
| 59 | + return text |
| 60 | + |
| 61 | + |
| 62 | +if __name__ == '__main__': |
| 63 | + sentiment_analysis() |
0 commit comments