|
| 1 | +import pandas as pd |
| 2 | +import numpy as np |
| 3 | +import json |
| 4 | +import nltk |
| 5 | +from nltk.stem import WordNetLemmatizer |
| 6 | +from sklearn.feature_extraction.text import TfidfVectorizer |
| 7 | +from sklearn.metrics.pairwise import cosine_similarity |
| 8 | +from tensorflow.keras.models import Sequential |
| 9 | +from tensorflow.keras.layers import Dense, Dropout |
| 10 | +from tensorflow.keras.optimizers import Adam |
| 11 | +import os |
| 12 | + |
| 13 | +# Download necessary NLTK data |
| 14 | +nltk.download('punkt') |
| 15 | +nltk.download('wordnet') |
| 16 | + |
| 17 | +# Initialize lemmatizer |
| 18 | +lemmatizer = WordNetLemmatizer() |
| 19 | + |
| 20 | +# Load datasets |
| 21 | +try: |
| 22 | + print("Files in current directory:", os.listdir()) |
| 23 | + csv_files = [f for f in os.listdir() if f.endswith('.csv') and 'Combined' in f and 'Data' in f] |
| 24 | + if csv_files: |
| 25 | + df = pd.read_csv(csv_files[0]) |
| 26 | + print(f"Loaded file: {csv_files[0]}") |
| 27 | + print("DataFrame columns:", df.columns.tolist()) |
| 28 | + print("DataFrame shape:", df.shape) |
| 29 | + print(df.head()) |
| 30 | + else: |
| 31 | + raise FileNotFoundError("No matching CSV file found") |
| 32 | +except FileNotFoundError as e: |
| 33 | + print(f"Error: {str(e)}") |
| 34 | + print("Ensure 'Combined_Data.csv' is in the same directory.") |
| 35 | + print("Current working directory:", os.getcwd()) |
| 36 | + exit(1) |
| 37 | + |
| 38 | +# Load intents.json |
| 39 | +try: |
| 40 | + with open('intents.json', 'r') as f: |
| 41 | + intents = json.load(f) |
| 42 | +except FileNotFoundError as e: |
| 43 | + print(f"Error: {str(e)}. Ensure 'intents.json' is available.") |
| 44 | + exit(1) |
| 45 | + |
| 46 | +# Preprocess text data |
| 47 | +def preprocess_text(text): |
| 48 | + tokens = nltk.word_tokenize(str(text).lower()) |
| 49 | + return ' '.join([lemmatizer.lemmatize(token) for token in tokens]) |
| 50 | + |
| 51 | +# Determine the correct column name for text data |
| 52 | +text_column = 'statement' if 'statement' in df.columns else df.columns[0] |
| 53 | + |
| 54 | +df['processed_text'] = df[text_column].apply(preprocess_text) |
| 55 | + |
| 56 | +# Create TF-IDF vectorizer |
| 57 | +vectorizer = TfidfVectorizer() |
| 58 | +tfidf_matrix = vectorizer.fit_transform(df['processed_text']) |
| 59 | + |
| 60 | +# Prepare data for model training |
| 61 | +X, y = [], [] |
| 62 | + |
| 63 | +for intent in intents['intents']: |
| 64 | + for pattern in intent['patterns']: |
| 65 | + X.append(preprocess_text(pattern)) |
| 66 | + y.append(intent['tag']) |
| 67 | + |
| 68 | +X = vectorizer.transform(X).toarray() |
| 69 | +y = pd.get_dummies(y).values |
| 70 | + |
| 71 | +# Ensure X and y have the same shape |
| 72 | +assert X.shape[0] == y.shape[0], f"Shape mismatch: X={X.shape}, y={y.shape}" |
| 73 | + |
| 74 | +# Build the model |
| 75 | +model = Sequential([ |
| 76 | + Dense(128, input_shape=(X.shape[1],), activation='relu'), |
| 77 | + Dropout(0.5), |
| 78 | + Dense(64, activation='relu'), |
| 79 | + Dropout(0.5), |
| 80 | + Dense(y.shape[1], activation='softmax') |
| 81 | +]) |
| 82 | + |
| 83 | +model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy']) |
| 84 | + |
| 85 | +# Train the model with error handling |
| 86 | +try: |
| 87 | + model.fit(X, y, epochs=200, batch_size=32, verbose=1) |
| 88 | +except Exception as e: |
| 89 | + print(f"Error during model training: {str(e)}") |
| 90 | + print(f"X shape: {X.shape}, y shape: {y.shape}") |
| 91 | + raise |
| 92 | + |
| 93 | +# Chatbot function |
| 94 | +def chatbot_response(user_input): |
| 95 | + processed_input = preprocess_text(user_input) |
| 96 | + input_vector = vectorizer.transform([processed_input]).toarray() |
| 97 | + |
| 98 | + # Find similar responses from the dataset |
| 99 | + similarities = cosine_similarity(input_vector, tfidf_matrix) |
| 100 | + most_similar_idx = similarities.argmax() |
| 101 | + response = df.iloc[most_similar_idx][text_column] |
| 102 | + |
| 103 | + # Classify intent |
| 104 | + intent_probs = model.predict(input_vector)[0] |
| 105 | + intent_idx = intent_probs.argmax() |
| 106 | + intent_tag = list(pd.get_dummies([i['tag'] for i in intents['intents']]).columns)[intent_idx] |
| 107 | + |
| 108 | + # Get response from intents |
| 109 | + for intent in intents['intents']: |
| 110 | + if intent['tag'] == intent_tag: |
| 111 | + intent_response = np.random.choice(intent['responses']) |
| 112 | + break |
| 113 | + |
| 114 | + return f"Dataset Response: {response}\n\nIntent Response: {intent_response}" |
| 115 | + |
| 116 | +# Main chat loop |
| 117 | +print("Mental Health Chatbot: Hello! I'm here to provide support and resources for mental health. How can I help you today?") |
| 118 | +while True: |
| 119 | + user_input = input("You: ") |
| 120 | + if user_input.lower() in ['quit', 'exit', 'bye']: |
| 121 | + print("Mental Health Chatbot: Take care! Remember, it's okay to seek help when you need it.") |
| 122 | + break |
| 123 | + response = chatbot_response(user_input) |
| 124 | + print("Mental Health Chatbot:", response) |
| 125 | + |
0 commit comments