add script

srujana-16 · web-flow · commit 5ec853715314 · 2023-08-03T12:51:40.000+05:30
diff --git a/Sentiment Analysis/README.md b/Sentiment Analysis/README.md
@@ -0,0 +1,30 @@
+# Sentiment Analysis using Support Vector Machine (SVM)
+
+This Python script performs sentiment analysis on text data using a Support Vector Machine (SVM) classifier. It reads data from a CSV file, preprocesses the text, and trains an SVM model to classify the sentiment of each text into positive or negative.
+
+## Requirements
+
+- Python 3.x
+- scikit-learn
+- numpy
+- pandas
+
+Install the required libraries using the following command:
+`pip install scikit-learn numpy pandas` 
+
+## Usage
+
+1. Prepare your data: Create a CSV file (`data.csv`) with two columns: 'text' containing the text data (sentences, reviews, etc.), and 'label' containing the corresponding sentiment labels (e.g., positive or negative).
+
+2. Run the script: Execute the Python script `Sentiment_Analysis.py` to perform sentiment analysis on the data.
+
+
+## Output
+
+The script will print the accuracy and classification report of the SVM model on the test set.
+
+## Author(s)
+
+Srujana
+
+
diff --git a/Sentiment Analysis/Sentiment_Analysis.py b/Sentiment Analysis/Sentiment_Analysis.py
@@ -0,0 +1,63 @@
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
+from sklearn.metrics import accuracy_score, classification_report
+
+
+def sentiment_analysis():
+    """
+    Perform sentiment analysis using an SVM classifier.
+
+    The function reads the data from a CSV file, preprocesses it, and trains an SVM classifier
+    for sentiment analysis on the 'text' column with the 'label' column as the target.
+
+    Prints the accuracy and classification report on the test data.
+    """
+    # Load data from a CSV file (replace 'data.csv' with your data file)
+    data = pd.read_csv('data.csv')
+
+    # Preprocess data (remove any special characters, convert to lowercase, etc.)
+    data['text'] = data['text'].apply(preprocess_text)
+
+    # Split the data into features (X) and labels (y)
+    X = data['text']
+    y = data['label']
+
+    # Convert text data to numerical features using TF-IDF
+    vectorizer = TfidfVectorizer()
+    X = vectorizer.fit_transform(X)
+
+    # Split the data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42)
+
+    # Train an SVM classifier
+    svm_classifier = SVC(kernel='linear')
+    svm_classifier.fit(X_train, y_train)
+
+    # Make predictions on the test set
+    y_pred = svm_classifier.predict(X_test)
+
+    # Calculate and print accuracy and classification report
+    accuracy = accuracy_score(y_test, y_pred)
+    print("Accuracy:", accuracy)
+    print("Classification Report:")
+    print(classification_report(y_test, y_pred, zero_division=1))
+
+
+def preprocess_text(text):
+    # Replace special characters with spaces
+    text = text.replace('\n', ' ')
+    text = text.replace('\t', ' ')
+    text = text.replace('-', ' ')
+
+    # Convert to lowercase
+    text = text.lower()
+
+    return text
+
+
+if __name__ == '__main__':
+    sentiment_analysis()
diff --git a/Sentiment Analysis/data.csv b/Sentiment Analysis/data.csv
@@ -0,0 +1,11 @@
+text,label
+"I love this product!",positive
+"This is amazing!",positive
+"Terrible experience. Would not recommend.",negative
+"Not bad, but not great either.",neutral
+"The best purchase I've made!",positive
+"I regret buying this.",negative
+"This is fantastic!",positive
+"Not the best, but okay.",neutral
+"Great value for money.",positive
+"Awful quality. Do not buy.",negative