diff --git a/README.md b/README.md
deleted file mode 100644
index 3a77d8b3c..000000000
--- a/README.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# FER May Hakathon
-
-Facial Emotion Detection Hackathon Project, Create a model and test it uing 5 to 10 sec videos to detect emotions
-
-# Facial Emotion Recognition
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-

-
-# Decoding Data Science in partnership with Falcons.ai
-
-
-Objective: Develop an efficient facial emotion classification system employing OpenCV/Tensorflow to identify facial emotions within video streams. The goal is to achieve a high level of accuracy, low latency, and minimal computational overhead.
-
-Similar to:
-
-[](https://www.youtube.com/watch?v=Msk1drgWEdY)
-
-Data Source: A video dataset or a combination of image datasets featuring the target objects in states of emotion.
-
-Kaggle : https://www.kaggle.com/datasets/msambare/fer2013
-
-Preprocessing (if needed): Standardize or augment the images/video frames to improve model generalization, if necessary, while preserving the aspect ratio and critical features.
-
-Model Selection & Training:
-1. Using the FER dataset(partial).
-2. Train a custom model using the prepared dataset and analyze the performance.
-3. Deploy Streamlit and OpenCV to allow users a web ui in which to upload a video and have the video frames analyzed by your model.
-
-Expecation
-
-The expectations are for the following:
-1) The code used to train the model.
-2) The model you trained.
-3) The Code used to run the UI and upload the video for inference.
-
-This problem set provides a clear path to address image analysis issues using OpenCV, with a focus on Facial Emotion Classification in video streams. It allows researchers or students to hone in on critical aspects such as data preprocessing, model selection, hyperparameter tuning, performance evaluation, and results interpretation.
-
-
--------------- Fully functional Jupyternotebook will be added upon hack-a-thon challenge completion --------------
-
-
-
-(back to top)
-
-
-
-## Usage
-
-
- To use the notebook with relative ease please follow the steps below:
-
-
-
-1. Ensure all of the required libraries are installed.
-
-2. Load the libraries.
-
-3. Run the cells and the cloud images will be generated and saved in the "clouds" directory.
-
-
-
-(back to top)
-
-
-
-
-
-
-## Contributing
-
-Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
-
-If you want, feel free to fork this repository. You can also simply open an issue with the tag "enhancement".
-Don't forget to give the project a star! Thanks again!
-
-1. Fork the Project
-2. Create your Feature Branch (`git checkout -b feature/YourFeature`)
-3. Commit your Changes (`git commit -m 'Add some YourFeature'`)
-4. Push to the Branch (`git push origin feature/YourFeature`)
-5. Open a Pull Request
-
-
-
-(back to top)
-
-
-
-
-## License
-
-
-
-(back to top)
-
-
-
-
-## Contact
-
-Project Link: [https://github.com/Falcons-ai/fer_dds_challenge]
-
-
-(back to top)
-
-
-
-Contributing
-Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are greatly appreciated.
-
-If you want, feel free to fork this repository. You can also simply open an issue with the tag "enhancement". Don't forget to give the project a star! Thanks again!
-
-Fork the Project
-Create your Feature Branch (git checkout -b feature/YourFeature)
-Commit your Changes (git commit -m 'Add some YourFeature')
-Push to the Branch (git push origin feature/YourFeature)
-Open a Pull Request
diff --git a/emotion_quantized.tflite b/emotion_quantized.tflite
new file mode 100644
index 000000000..d14de7ee4
Binary files /dev/null and b/emotion_quantized.tflite differ
diff --git a/model_training.ipynb b/model_training.ipynb
new file mode 100644
index 000000000..b7e0bbcd8
--- /dev/null
+++ b/model_training.ipynb
@@ -0,0 +1,332 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "17d08e51-cdd4-452b-ab39-e5299e543107",
+ "metadata": {},
+ "source": [
+ "The following is the training sequence of a light weight model used for emotion detection on video frames. The model is small to increase the inference speed while maintaining a decent accuracy. It was further quantised from 8mb to 2 mb post training. Here is the demo video link: https://drive.google.com/file/d/1atuPCgSzteMwL30KOVUDsYgNZSkelzgI/view?usp=drive_link"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1a847c7a-fac0-4530-b49a-c9b414807b42",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "^C\n",
+ "Requirement already satisfied: numpy in c:\\python312\\lib\\site-packages (1.26.4)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q tensorflow\n",
+ "!pip install -q tensorflow-model-optimization\n",
+ "!pip install numpy\n",
+ "!pip install keras seaborn scikit-learn matplotlib"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9aef1b7-c297-4117-a855-f512f5105d8a",
+ "metadata": {},
+ "source": [
+ "The following section is data preparation. To run this sequence, create a folder named 'images' in the root of the project. It should have 2 folders, 'train' and 'validation'. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1786eb7f-6eef-48b2-9195-6be97b127353",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from keras.preprocessing.image import load_img, img_to_array\n",
+ "\n",
+ "def extract_dataset(main_folder, image_size=(48, 48)):\n",
+ " train_dataset = []\n",
+ " train_labels = []\n",
+ " val_dataset = []\n",
+ " val_labels = []\n",
+ "\n",
+ " # Define a dictionary to map each emotion to a unique label\n",
+ " emotion_labels = {'neutral': 0, 'happy': 1,'angry': 2 , 'surprise':3,'sad':4}\n",
+ "\n",
+ " # Loop through emotions in the main folder\n",
+ " for emotion in emotion_labels:\n",
+ " train_folder = os.path.join(main_folder, 'train', emotion)\n",
+ " val_folder = os.path.join(main_folder, 'validation', emotion)\n",
+ " label = emotion_labels[emotion]\n",
+ "\n",
+ " # Training set\n",
+ " for filename in os.listdir(train_folder):\n",
+ " if filename.endswith(('.jpg', '.jpeg', '.png', '.JPG')): \n",
+ " image_path = os.path.join(train_folder, filename)\n",
+ "\n",
+ " # Loading the image using TensorFlow and convert to grayscale\n",
+ " img = load_img(image_path, color_mode='grayscale', target_size=image_size)\n",
+ " img_array = img_to_array(img)\n",
+ "\n",
+ " # Normalize the pixel values to the range [0, 1]\n",
+ " img_array /= 255.0\n",
+ "\n",
+ " # Append the image data and label to the training dataset\n",
+ " train_dataset.append(img_array)\n",
+ " train_labels.append(label)\n",
+ "\n",
+ " # Validation set\n",
+ " for filename in os.listdir(val_folder):\n",
+ " if filename.endswith(('.jpg', '.jpeg', '.png', '.JPG')): \n",
+ " image_path = os.path.join(val_folder, filename)\n",
+ "\n",
+ " # Loading the image using TensorFlow and converting to grayscale\n",
+ " img = load_img(image_path, color_mode='grayscale', target_size=image_size)\n",
+ " img_array = img_to_array(img)\n",
+ "\n",
+ " # Normalize the pixel values to the range [0, 1]\n",
+ " img_array /= 255.0\n",
+ "\n",
+ " # Append the image data to the validation dataset\n",
+ " val_dataset.append(img_array)\n",
+ " val_labels.append(label)\n",
+ "\n",
+ " return train_dataset, train_labels, val_dataset, val_labels\n",
+ " \n",
+ "main_folder_path = '/images' \n",
+ "image_size = (48, 48)\n",
+ "\n",
+ "train_dataset, train_labels, val_dataset, val_labels = extract_dataset(main_folder_path, image_size)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed0dd97e-4d17-4f77-89de-d7501cb8a4e0",
+ "metadata": {},
+ "source": [
+ "The following code defines the model. We are using vggnet16 architecture with the fer2013 dataset. The inputs are grey scale 48 x 48 images."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0ea1673-3e38-45da-80ac-b86463f5f7c3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "from keras.models import Sequential\n",
+ "from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization\n",
+ "from keras.optimizers import Adam\n",
+ "from keras.callbacks import EarlyStopping\n",
+ "from keras.regularizers import l2\n",
+ "from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay\n",
+ "from sklearn.metrics import classification_report, roc_curve, auc\n",
+ "from sklearn.utils import class_weight\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import numpy as np\n",
+ "import tensorflow_model_optimization as tfmot\n",
+ "import tf_keras as keras\n",
+ "import tempfile\n",
+ "\n",
+ "def create_vggnet16_model(input_shape=(48, 48, 1)):\n",
+ " model = Sequential()\n",
+ "\n",
+ " model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))\n",
+ " model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))\n",
+ " model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+ " model.add(Dropout(0.25))\n",
+ "\n",
+ " model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))\n",
+ " model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+ " model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))\n",
+ " model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+ " model.add(Dropout(0.25))\n",
+ "\n",
+ " model.add(Flatten())\n",
+ " model.add(Dense(1024, activation='relu'))\n",
+ " model.add(Dropout(0.5))\n",
+ " model.add(Dense(5, activation='softmax'))\n",
+ " model.summary()\n",
+ " return model\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24d2c686-1aa3-47ad-a9c4-7a0ab6376e3a",
+ "metadata": {},
+ "source": [
+ "These are some graph plotting functions. They have been taken as a sample from the internet."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "203cf059-cb5e-49ae-bc93-a36411ebb432",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def plot_confusion_matrix(y_true, y_pred, classes):\n",
+ " cm = confusion_matrix(y_true, y_pred)\n",
+ " \n",
+ " # Get unique classes from the data\n",
+ " unique_classes = np.unique(np.concatenate([y_true, y_pred]))\n",
+ "\n",
+ " # Display confusion matrix\n",
+ " disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)\n",
+ " disp.plot(cmap='Blues', values_format='d')\n",
+ "\n",
+ " # Update tick locations based on unique classes\n",
+ " plt.xticks(np.arange(len(unique_classes)), unique_classes, rotation=45)\n",
+ " plt.yticks(np.arange(len(unique_classes)), unique_classes)\n",
+ " \n",
+ " plt.show()\n",
+ "def plot_accuracy_curves(history):\n",
+ " # Plot train accuracy and validation accuracy over epochs\n",
+ " plt.plot(history.history['accuracy'], label='Train Accuracy')\n",
+ " plt.plot(history.history['val_accuracy'], label='Validation Accuracy')\n",
+ " plt.xlabel('Epochs')\n",
+ " plt.ylabel('Accuracy')\n",
+ " plt.legend()\n",
+ " plt.title('Training Vs Validation Accuracy')\n",
+ " plt.show() \n",
+ "\n",
+ "def plot_loss_curves(history):\n",
+ " # Plot train loss and validation loss over epochs\n",
+ " plt.plot(history.history['loss'], label='Train Loss')\n",
+ " plt.plot(history.history['val_loss'], label='Validation Loss')\n",
+ " plt.xlabel('Epochs')\n",
+ " plt.ylabel('Loss')\n",
+ " plt.legend()\n",
+ " plt.title('Training Vs Validation Loss')\n",
+ " plt.show()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07966a86-e87a-4300-8828-2ab7529b1abf",
+ "metadata": {},
+ "source": [
+ "This is the main training sequence. We have used Adams optimizer and sparse categorical crossentropy as our loss function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6fabee30-2034-49ba-a2c3-f959132147e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def train_vgg(dataset, labels, learning_rate=0.001, batch_size=32, test_size=0.2, epochs=10, patience=3):\n",
+ " # Split the dataset into training and testing sets\n",
+ " X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=test_size, random_state=42)\n",
+ "\n",
+ " # Compute class weights\n",
+ " class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(labels), y=labels)\n",
+ " class_weight_dict = dict(enumerate(class_weights))\n",
+ " print(class_weight_dict)\n",
+ " \n",
+ " # Create and compile the VGGNet16 model\n",
+ " model = create_vggnet16_model()\n",
+ " optimizer = Adam(learning_rate=learning_rate)\n",
+ " model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
+ "\n",
+ " # Define early stopping\n",
+ " early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)\n",
+ "\n",
+ " # Train the model with class weights\n",
+ " history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test),\n",
+ " callbacks=[early_stopping], class_weight=class_weight_dict)\n",
+ "\n",
+ " # Quantize the model\n",
+ " \n",
+ "\n",
+ " # Evaluate the quantized model\n",
+ " test_loss, test_accuracy = model.evaluate(X_test, y_test)\n",
+ " print(f\"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}\")\n",
+ "\n",
+ " # Optionally, you can also evaluate the quantized model on the training set\n",
+ " train_loss, train_accuracy = model.evaluate(X_train, y_train)\n",
+ " print(f\"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}\")\n",
+ "\n",
+ " return model\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a785e396-4ebc-4d6e-ad6b-e066db4c0f15",
+ "metadata": {},
+ "source": [
+ "In the following section, the model is trained on a total of 40 epochs. After the model is trained, it is converted into a tflite model and quantised. The quantised model is then saved as emotion_quantized.tflite in the root of the project."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d3e395a8-3e74-4356-b62e-dbf8b5f3924b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import psutil\n",
+ "print(\"Memory Usage Before:\", psutil.virtual_memory())\n",
+ "\n",
+ "# for five emotions {neutral , happy ,angry, surprise, sad}\n",
+ "\n",
+ "dataset = np.array(train_dataset)\n",
+ "labels = np.array(train_labels)\n",
+ "\n",
+ "simple_model = train_vgg(dataset, labels, learning_rate=0.0001, batch_size=128, test_size=0.2, epochs=40, patience=7)\n",
+ "# Convert the Keras model to TensorFlow Lite\n",
+ "converter = tf.lite.TFLiteConverter.from_keras_model(simple_model)\n",
+ "converter.optimizations = [tf.lite.Optimize.DEFAULT] # Enable optimization\n",
+ "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] # Target INT8 for better performance\n",
+ "converter.allow_flexible_interpreter = True # Allow flexible interpreter for dynamic range support\n",
+ "\n",
+ "\n",
+ "def representative_dataset():\n",
+ " for _ in range(100): \n",
+ " yield [np.random.random_sample((1, 48, 48, 1)).astype(np.float32)]\n",
+ "\n",
+ "converter.representative_dataset = representative_dataset\n",
+ "tflite_quant_model = converter.convert()\n",
+ "\n",
+ "\n",
+ "with open(\"emotion_quantized.tflite\", \"wb\") as f:\n",
+ " f.write(tflite_quant_model)\n",
+ "\n",
+ "\n",
+ "model_size_bytes = os.path.getsize(\"emotion_quantized.tflite\")\n",
+ "model_size_kb = model_size_bytes / 1024 # Convert to kilobytes\n",
+ "model_size_mb = model_size_kb / 1024 # Convert to megabytes\n",
+ "print(f\"Size of the quantized model: {model_size_bytes} bytes ({model_size_kb:.2f} KB, {model_size_mb:.2f} MB)\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/readme.md b/readme.md
new file mode 100644
index 000000000..ffe7e6bd0
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,14 @@
+# Cipher Pol
+
+## Team Members
+
+- **Murtaza Mustafa**
+- **Shafaq Mandha**
+
+## Captain
+
+- **Murtaza Mustafa**
+
+## Contact Information
+
+Email ID: murtaza.0903@gmail.com
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..bb4aa47fd
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,26 @@
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyter_server==2.14.1
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.1
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.2
+jupyterlab_widgets==3.0.11
+keras==3.3.3
+matplotlib-inline==0.1.7
+notebook==7.2.0
+notebook_shim==0.2.4
+numpy==1.26.4
+opencv-python==4.9.0.80
+opencv-python-headless==4.9.0.80
+pandas==2.2.2
+pillow==10.3.0
+streamlit==1.35.0
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.1
+tensorflow-intel==2.16.1
\ No newline at end of file
diff --git a/video.txt b/video.txt
new file mode 100644
index 000000000..c3eb9bcf9
--- /dev/null
+++ b/video.txt
@@ -0,0 +1 @@
+here is the link for the video demo: https://drive.google.com/file/d/1atuPCgSzteMwL30KOVUDsYgNZSkelzgI/view?usp=drive_link
diff --git a/website.py b/website.py
new file mode 100644
index 000000000..27c478a5b
--- /dev/null
+++ b/website.py
@@ -0,0 +1,88 @@
+# To run this website, download the modules from requirements file. Then run this command on your terminal:
+# streamlit run website.py We currently are processing only one face per frame to reduce computational
+# overhead. The latency might go down when streaming frames directly to the streamlit app.
+
+
+
+import streamlit as st
+import cv2 as cv
+import tempfile
+import numpy as np
+import streamlit as st
+import tensorflow as tf
+
+# Load the emotion recognition model
+interpreter = tf.lite.Interpreter('emotion_quantized.tflite')
+interpreter.allocate_tensors()
+
+def get_input_details(interpreter):
+ input_details = interpreter.get_input_details()
+ for detail in input_details:
+ if detail['name'] == 'input':
+ return detail['index']
+ raise ValueError("Input tensor not found.")
+
+def get_output_details(interpreter):
+ output_details = interpreter.get_output_details()
+ return output_details[0]['index']
+
+
+st.title("Team Cipher Pol Facial Emotion Detector")
+f = st.file_uploader("Upload file")
+
+
+
+
+
+def preprocess(input_image, target_size=(48, 48)):
+ face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')
+ img_gray = cv.cvtColor(input_image, cv.COLOR_BGR2GRAY)
+ faces = face_cascade.detectMultiScale(img_gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+ if len(faces) > 0:
+ # change the following line to detect all the faces
+ x, y, w, h = faces[0]
+ face_img = img_gray[y:y+h, x:x+w]
+ cv.rectangle(input_image, (x, y), (x+w, y+h), (0, 255, 0), 2)
+ resized_face = cv.resize(face_img, target_size)
+ normalized_face = resized_face / 255.0
+ normalized_face = np.expand_dims(normalized_face, axis=0)
+ return normalized_face
+ return None
+
+stframe = st.empty()
+
+def predict_emotion_from_video_file(model, video_file_path):
+ cap = cv.VideoCapture(video_file_path)
+ while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+
+ preprocessed_frame = preprocess(frame, target_size=(48, 48))
+ if preprocessed_frame is not None:
+ input_details = interpreter.get_input_details()
+ input_index = input_details[0]['index']
+ preprocessed_frame_float32 = preprocessed_frame.astype(np.float32).reshape(1, 48, 48, 1)
+ interpreter.set_tensor(input_index, preprocessed_frame_float32) # Use the converted array
+ interpreter.invoke()
+
+ output_index = get_output_details(interpreter)
+ predictions = interpreter.get_tensor(output_index)
+ top_classes_indices = np.argsort(predictions)[0, -2:][::-1]
+ top_classes_labels = ['neutral', 'happy', 'angry', 'surprise', 'sad']
+ top1_class_index = top_classes_indices[0]
+ top1_class_label = top_classes_labels[top1_class_index]
+ top1_class_percentage = predictions[0, top1_class_index] * 100
+ top2_class_index = top_classes_indices[1]
+ top2_class_label = top_classes_labels[top2_class_index]
+ top2_class_percentage = predictions[0, top2_class_index] * 100
+ cv.putText(frame, f"{top1_class_label}: {top1_class_percentage:.2f}% | {top2_class_label}: {top2_class_percentage:.2f}%", (10, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+ cv.imshow('Real-Time Emotion Detection', frame)
+ frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
+ stframe.image(frame_rgb)
+
+
+if f is not None:
+ tfile = tempfile.NamedTemporaryFile(delete=False)
+ tfile.write(f.read())
+ predict_emotion_from_video_file(interpreter, tfile.name)
\ No newline at end of file