"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import shap\n",
+ "import torch\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from transformers import RobertaTokenizer\n",
+ "from torch.utils.data import DataLoader\n",
+ "\n",
+ "# Load tokenizer\n",
+ "tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")\n",
+ "\n",
+ "# Ensure sample_texts is a List[str]\n",
+ "sample_texts = [str(text) for text in dev_texts[:10]] # Convert to a list of strings\n",
+ "\n",
+ "# **SHAP Wrapper: Tokenization & Model Prediction**\n",
+ "class ModelWrapper:\n",
+ " def __init__(self, model, tokenizer):\n",
+ " self.model = model\n",
+ " self.tokenizer = tokenizer\n",
+ "\n",
+ " def __call__(self, input_texts):\n",
+ " \"\"\" Tokenizes text and runs through the model for SHAP \"\"\"\n",
+ " if isinstance(input_texts, np.ndarray): # Convert NumPy arrays to list\n",
+ " input_texts = input_texts.tolist()\n",
+ "\n",
+ " # Ensure input_texts is a List[str]\n",
+ " input_texts = [str(text) for text in input_texts]\n",
+ "\n",
+ " encodings = self.tokenizer(\n",
+ " input_texts, padding=True, truncation=True, max_length=64, return_tensors=\"pt\"\n",
+ " ).to(device)\n",
+ "\n",
+ " with torch.no_grad():\n",
+ " outputs = self.model(encodings[\"input_ids\"], encodings[\"attention_mask\"])\n",
+ "\n",
+ " return torch.sigmoid(outputs).cpu().numpy() # Convert logits to probabilities\n",
+ "\n",
+ "# **Initialize SHAP Explainer with Text Masker**\n",
+ "wrapped_model = ModelWrapper(model, tokenizer)\n",
+ "masker = shap.maskers.Text(tokenizer)\n",
+ "explainer = shap.Explainer(wrapped_model, masker)\n",
+ "\n",
+ "# **Compute SHAP values for selected samples**\n",
+ "shap_values = explainer(sample_texts)\n",
+ "\n",
+ "# **Visualize the SHAP explanation for the first sample**\n",
+ "shap.text_plot(shap_values)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Bwrv04VMmoDd"
+ },
+ "source": [
+ "So I'm trying to figure out a way to remove the G dot symbol that appears at the beginning of every word. You can also run the code, and it saves the explanation diagram in a HTML5 which can be downloaded in opened separately.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "81szZ2E0f3Gl",
+ "outputId": "e9fc8298-d0cf-4d7d-9c60-fe0a07159d93"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Using device: cuda\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ ":35: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+ " model.load_state_dict(torch.load(\"/content/best_model.pt\", map_location=device))\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import torch\n",
+ "import shap\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "from transformers import RobertaTokenizer, RobertaModel\n",
+ "from torch.utils.data import DataLoader\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 1. Set Up Device and Define Your Custom Model\n",
+ "# =============================================================================\n",
+ "\n",
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(\"Using device:\", device)\n",
+ "\n",
+ "# Define your custom model (architecture used during training)\n",
+ "class LightweightRobertaClass(torch.nn.Module):\n",
+ " def __init__(self, num_labels=5):\n",
+ " super(LightweightRobertaClass, self).__init__()\n",
+ " # Load the base model; note that the pooler weights may be newly initialized.\n",
+ " self.roberta = RobertaModel.from_pretrained(\"roberta-base\")\n",
+ " self.dropout = torch.nn.Dropout(0.3)\n",
+ " self.classifier = torch.nn.Linear(768, num_labels)\n",
+ "\n",
+ " def forward(self, input_ids, attention_mask):\n",
+ " output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)\n",
+ " # Use the representation of the [CLS] token (first token)\n",
+ " cls_output = output.last_hidden_state[:, 0, :]\n",
+ " cls_output = self.dropout(cls_output)\n",
+ " logits = self.classifier(cls_output)\n",
+ " return logits\n",
+ "\n",
+ "# Load your saved model weights from /content/best_model.pt.\n",
+ "model = LightweightRobertaClass(num_labels=5)\n",
+ "model.load_state_dict(torch.load(\"/content/best_model.pt\", map_location=device))\n",
+ "model.to(device)\n",
+ "model.eval() # set the model to evaluation mode\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 2. Load Tokenizer and Define Emotion Labels\n",
+ "# =============================================================================\n",
+ "\n",
+ "tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")\n",
+ "# Ensure the order here matches what was used during training.\n",
+ "emotion_labels = [\"anger\", \"fear\", \"joy\", \"sadness\", \"surprise\"]\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 3. Define a Custom Prediction Function for SHAP\n",
+ "# =============================================================================\n",
+ "\n",
+ "def predict(texts):\n",
+ " \"\"\"\n",
+ " Tokenizes input texts and returns model predictions as probabilities.\n",
+ " Ensures that texts is a list of strings.\n",
+ " \"\"\"\n",
+ " # Convert input to a list of strings if needed.\n",
+ " if isinstance(texts, np.ndarray):\n",
+ " texts = texts.tolist()\n",
+ " if not isinstance(texts, list):\n",
+ " texts = [texts]\n",
+ " texts = [str(t) for t in texts]\n",
+ "\n",
+ " encodings = tokenizer(\n",
+ " texts,\n",
+ " padding=True,\n",
+ " truncation=True,\n",
+ " max_length=64,\n",
+ " return_tensors=\"pt\"\n",
+ " ).to(device)\n",
+ "\n",
+ " with torch.no_grad():\n",
+ " outputs = model(encodings[\"input_ids\"], encodings[\"attention_mask\"])\n",
+ "\n",
+ " # Convert logits to probabilities using sigmoid (assuming BCEWithLogitsLoss was used during training)\n",
+ " probs = torch.sigmoid(outputs)\n",
+ " return probs.cpu().numpy()\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 4. Initialize the SHAP Explainer with a Text Masker\n",
+ "# =============================================================================\n",
+ "\n",
+ "# The text masker uses the tokenizer to split input texts into tokens.\n",
+ "explainer = shap.Explainer(predict, shap.maskers.Text(tokenizer), algorithm=\"partition\")\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 5. Load Sample Data for SHAP Analysis\n",
+ "# =============================================================================\n",
+ "\n",
+ "# Load your development CSV file.\n",
+ "dev_file = \"/content/sample_data/eng_dev.csv\"\n",
+ "df_dev = pd.read_csv(dev_file)\n",
+ "\n",
+ "# Ensure all entries are strings and select a sample (first 10 examples here).\n",
+ "sample_texts = [str(txt) for txt in df_dev[\"text\"].tolist()][:10]\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 6. Compute SHAP Values and Clean Token Names\n",
+ "# =============================================================================\n",
+ "\n",
+ "# Compute the SHAP values for the selected texts.\n",
+ "shap_values = explainer(sample_texts)\n",
+ "\n",
+ "# Define a function to clean out RoBERTa's BPE artifacts (e.g. remove \"Ġ\" or \"▁\").\n",
+ "def clean_token(token):\n",
+ " return token.replace(\"Ġ\", \"\").replace(\"▁\", \"\")\n",
+ "\n",
+ "# Clean the token names in each explanation.\n",
+ "for sv in shap_values:\n",
+ " sv.feature_names = [clean_token(tok) for tok in sv.feature_names]\n",
+ "\n",
+ "# =============================================================================\n",
+ "# 7. Visualize the SHAP Explanations\n",
+ "# =============================================================================\n",
+ "\n",
+ "# Option 1: Interactive Text Plot\n",
+ "# This will display an interactive text plot in the notebook.\n",
+ "shap.plots.text(shap_values)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import shap\n",
+ "import torch\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "from transformers import RobertaTokenizer, RobertaForSequenceClassification, pipeline\n",
+ "from IPython.core.display import display, HTML\n",
+ "\n",
+ "# **Ensure SHAP JS visualization is loaded**\n",
+ "shap.initjs()\n",
+ "\n",
+ "# **Set up device**\n",
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "print(f\"Using device: {device}\")\n",
+ "\n",
+ "# **Load tokenizer and model**\n",
+ "tokenizer = RobertaTokenizer.from_pretrained(\"roberta-base\")\n",
+ "model = RobertaForSequenceClassification.from_pretrained(\"roberta-base\", num_labels=5).to(device)\n",
+ "\n",
+ "# **Build a Transformers Pipeline for SHAP Compatibility**\n",
+ "pred = pipeline(\n",
+ " \"text-classification\",\n",
+ " model=model,\n",
+ " tokenizer=tokenizer,\n",
+ " device=0,\n",
+ " top_k=None, # Replaces deprecated `return_all_scores=True`\n",
+ ")\n",
+ "\n",
+ "# **Load and select sample texts from development dataset**\n",
+ "dev_file = \"/content/sample_data/eng_dev.csv\"\n",
+ "df_dev = pd.read_csv(dev_file)\n",
+ "sample_texts = df_dev[\"text\"].tolist()[:10] # Select 10 samples for SHAP analysis\n",
+ "\n",
+ "# **Initialize SHAP Explainer for the Transformers Pipeline**\n",
+ "explainer = shap.Explainer(pred)\n",
+ "\n",
+ "# **Compute SHAP values**\n",
+ "shap_values = explainer(sample_texts)\n",
+ "\n",
+ "# **🔹 Fix Token Names Before Displaying SHAP Values**\n",
+ "def clean_token(token):\n",
+ " \"\"\"Removes RoBERTa's special Ġ character from tokens.\"\"\"\n",
+ " return token.replace(\"Ġ\", \"\")\n",
+ "\n",
+ "# Apply cleaning function to SHAP output\n",
+ "for sv in shap_values:\n",
+ " sv.feature_names = [clean_token(tok) for tok in sv.feature_names]\n",
+ "\n",
+ "# **🔹 Fix SHAP Labels to Show Emotion Names Instead of `LABEL_0, LABEL_1`**\n",
+ "emotion_labels = [\"anger\", \"fear\", \"joy\", \"sadness\", \"surprise\"]\n",
+ "\n",
+ "# Convert SHAP values into a mean-per-class representation\n",
+ "shap_mean_per_class = [np.mean(sv.values, axis=0) for sv in shap_values]\n",
+ "\n",
+ "# Convert to a SHAP Explanation object with correct labels\n",
+ "shap_explanation = shap.Explanation(\n",
+ " values=np.array(shap_mean_per_class), \n",
+ " feature_names=emotion_labels\n",
+ ")\n",
+ "\n",
+ "# **📌 Option 1: Interactive SHAP Text Plot (Best for Google Colab)**\n",
+ "shap_html = shap.plots.text(shap_values, display=False) # Generate HTML content\n",
+ "display(HTML(shap_html)) # Render HTML in Colab\n",
+ "\n",
+ "# **📌 Option 2: Save SHAP Outputs as an HTML File**\n",
+ "with open(\"shap_explanation.html\", \"w\", encoding=\"utf-8\") as f:\n",
+ " f.write(shap_html)\n",
+ "print(\"SHAP Explanation saved as 'shap_explanation.html'. You can open this file in a browser.\")\n",
+ "\n",
+ "# **📌 Option 3: SHAP Bar Chart for Emotion Contribution**\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "shap.plots.bar(shap_explanation)\n",
+ "plt.show()\n",
+ "\n",
+ "# **📌 Option 4: Waterfall Plot for a Single Emotion**\n",
+ "emotion_index = 0 # Change this to visualize a different emotion (0-4)\n",
+ "\n",
+ "shap_single_label = shap.Explanation(\n",
+ " values=shap_values[0].values[:, emotion_index], # Only one column\n",
+ " base_values=shap_values[0].base_values[emotion_index], # Base value for this emotion\n",
+ " data=shap_values[0].data, # Original tokenized text\n",
+ " feature_names=shap_values[0].feature_names # Token names\n",
+ ")\n",
+ "\n",
+ "# **Waterfall Plot for a Single Emotion**\n",
+ "shap.waterfall_plot(shap_single_label)\n",
+ "\n",
+ "# **📌 Option 5: Corrected Force Plot for a Single Prediction**\n",
+ "expected_value = explainer.expected_value\n",
+ "if isinstance(expected_value, list) or isinstance(expected_value, np.ndarray):\n",
+ " expected_value = expected_value[0] # Extract first value if it's a list\n",
+ "\n",
+ "shap.force_plot(\n",
+ " expected_value,\n",
+ " shap_values[0].values,\n",
+ " shap_values[0].feature_names,\n",
+ " matplotlib=True\n",
+ ")\n",
+ "plt.show()\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "authorship_tag": "ABX9TyOaCmby931KMHlOSy5WRR3v",
+ "gpuType": "T4",
+ "include_colab_link": true,
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/SemEval.ipynb b/SemEval.ipynb
new file mode 100644
index 0000000..8898815
--- /dev/null
+++ b/SemEval.ipynb
@@ -0,0 +1,13518 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "from torch.utils.data import Dataset\n",
+ "from torch.utils.data import DataLoader\n",
+ "from transformers import RobertaModel, RobertaTokenizer, get_linear_schedule_with_warmup, AdamW\n",
+ "from sklearn.metrics import f1_score\n",
+ "import shap\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dynamic max length: 110\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load the CSV file\n",
+ "def load_data(file_path):\n",
+ " df = pd.read_csv(file_path)\n",
+ " texts = df[\"text\"].tolist()\n",
+ " labels = df[[\"anger\", \"fear\", \"joy\", \"sadness\", \"surprise\"]].values\n",
+ " return texts, labels\n",
+ "\n",
+ "# Load train and test data\n",
+ "train_file = \"Emotion_data/public_data_test/track_a/train/eng.csv\"\n",
+ "dev_file = \"Emotion_data/public_data_test/track_a/dev/eng.csv\"\n",
+ "test_file = \"Emotion_data/public_data_test/track_a/test/eng.csv\"\n",
+ "human_pred = \"Emotion_data/eng_test_50 labels.csv\"\n",
+ "\n",
+ "train_texts, train_labels = load_data(train_file)\n",
+ "dev_texts, dev_labels = load_data(dev_file)\n",
+ "test_texts, test_labels = load_data(test_file)\n",
+ "h_texts, h_labels = load_data(human_pred)\n",
+ "\n",
+ "# Initialize tokenizer\n",
+ "tokenizer = RobertaTokenizer.from_pretrained(\"roberta-large\")\n",
+ "\n",
+ "# Find the max token length dynamically\n",
+ "def find_max_length(texts, tokenizer):\n",
+ " tokenized_texts = [tokenizer.tokenize(text) for text in texts]\n",
+ " return max(len(tokens) for tokens in tokenized_texts)\n",
+ "\n",
+ "max_length = find_max_length(train_texts + test_texts, tokenizer) # Find max length from both train & test\n",
+ "print(f\"Dynamic max length: {max_length}\")\n",
+ "\n",
+ "# Tokenize with dynamic max_length\n",
+ "def tokenize_texts(texts, tokenizer, max_length):\n",
+ " return tokenizer(\n",
+ " texts,\n",
+ " max_length=max_length,\n",
+ " truncation=True,\n",
+ " padding=\"max_length\",\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ "\n",
+ "train_encodings = tokenize_texts(train_texts, tokenizer, max_length)\n",
+ "dev_encodings = tokenize_texts(dev_texts, tokenizer, max_length)\n",
+ "test_encodings = tokenize_texts(test_texts, tokenizer, max_length)\n",
+ "h_encodings = tokenize_texts(h_texts, tokenizer, max_length)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dataset class\n",
+ "class EmotionDataset(Dataset):\n",
+ " def __init__(self, encodings, labels):\n",
+ " self.encodings = encodings\n",
+ " self.labels = labels\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.labels)\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " return {\n",
+ " \"input_ids\": self.encodings[\"input_ids\"][idx],\n",
+ " \"attention_mask\": self.encodings[\"attention_mask\"][idx],\n",
+ " \"labels\": torch.tensor(self.labels[idx], dtype=torch.float),\n",
+ " }\n",
+ "\n",
+ "# Create datasets\n",
+ "train_dataset = EmotionDataset(train_encodings, train_labels)\n",
+ "dev_dataset = EmotionDataset(dev_encodings, dev_labels)\n",
+ "test_dataset = EmotionDataset(test_encodings, test_labels)\n",
+ "h_dataset = EmotionDataset(h_encodings, h_labels)\n",
+ "\n",
+ "\n",
+ "# Create data loaders\n",
+ "train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)\n",
+ "dev_loader = DataLoader(dev_dataset, batch_size=8, shuffle=False)\n",
+ "test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class RobertaClass(torch.nn.Module):\n",
+ " def __init__(self, num_labels=5):\n",
+ " super(RobertaClass, self).__init__()\n",
+ " self.roberta = RobertaModel.from_pretrained(\"roberta-large\")\n",
+ " self.dropout = torch.nn.Dropout(0.3)\n",
+ "\n",
+ " # Additional fully connected layers\n",
+ " self.fc1 = torch.nn.Linear(1024, 512) \n",
+ " self.fc2 = torch.nn.Linear(512, num_labels)\n",
+ "\n",
+ " def forward(self, input_ids, attention_mask):\n",
+ " # Get the output from Roberta\n",
+ " output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)\n",
+ " \n",
+ " cls_output = output[0][:, 0, :] # (batch_size, hidden_size) - [CLS] token representation\n",
+ " \n",
+ " # Apply dropout for regularization\n",
+ " cls_output = self.dropout(cls_output)\n",
+ "\n",
+ " # Pass through the first fully connected layer\n",
+ " x = torch.nn.ReLU()(self.fc1(cls_output))\n",
+ " \n",
+ " # Apply dropout after the fully connected layer\n",
+ " x = self.dropout(x)\n",
+ " \n",
+ " # Final output layer for classification\n",
+ " logits = self.fc2(x)\n",
+ " \n",
+ " return logits\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "/home/bhra451f/.conda/envs/SemEval/lib/python3.13/site-packages/transformers/optimization.py:591: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Epoch 1/20, Loss: 0.5785464017377423\n",
+ "Epoch 2/20, Loss: 0.4053634816856054\n",
+ "Epoch 3/20, Loss: 0.3126654745957066\n",
+ "Epoch 4/20, Loss: 0.22486786189020713\n",
+ "Epoch 5/20, Loss: 0.16152142268212544\n",
+ "Epoch 6/20, Loss: 0.10660705753639771\n",
+ "Epoch 7/20, Loss: 0.07348340036175241\n",
+ "Epoch 8/20, Loss: 0.05557230722449849\n",
+ "Epoch 9/20, Loss: 0.040043652660584864\n",
+ "Epoch 10/20, Loss: 0.025424654331768867\n",
+ "Epoch 11/20, Loss: 0.024165811880513206\n",
+ "Epoch 12/20, Loss: 0.013389948796550878\n",
+ "Epoch 13/20, Loss: 0.008618564357391606\n",
+ "Epoch 14/20, Loss: 0.007726972375934502\n",
+ "Epoch 15/20, Loss: 0.0075498565570841615\n",
+ "Epoch 16/20, Loss: 0.004976064788387561\n",
+ "Epoch 17/20, Loss: 0.005387922700641225\n",
+ "Epoch 18/20, Loss: 0.003472040692005156\n",
+ "Epoch 19/20, Loss: 0.0035766657482902654\n",
+ "Epoch 20/20, Loss: 0.0031993960542133285\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_933215/3682864175.py:58: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+ " model.load_state_dict(torch.load(\"best_model.pt\"))\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Training the Roberta model\n",
+ "device = torch.device(\"cuda:7\" if torch.cuda.is_available() else \"cpu\")\n",
+ "\n",
+ "epochs = 20\n",
+ "\n",
+ "model = RobertaClass(num_labels=5)\n",
+ "model.to(device)\n",
+ "\n",
+ "optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8, weight_decay=1e-3) # Adjusted learning rate\n",
+ "criterion = torch.nn.BCEWithLogitsLoss()\n",
+ "\n",
+ "# Scheduler for learning rate decay\n",
+ "warmup_ratio = 0.1 # 10% of total training as warmup\n",
+ "total_steps = len(train_loader) * epochs\n",
+ "warmup_steps = int(total_steps * warmup_ratio)\n",
+ "\n",
+ "scheduler = get_linear_schedule_with_warmup(\n",
+ " optimizer,\n",
+ " num_warmup_steps=warmup_steps,\n",
+ " num_training_steps=total_steps\n",
+ ")\n",
+ "\n",
+ "best_loss = float('inf')\n",
+ "\n",
+ "for epoch in range(epochs):\n",
+ " model.train()\n",
+ " total_loss = 0\n",
+ " for batch in train_loader:\n",
+ " input_ids = batch[\"input_ids\"].to(device)\n",
+ " attention_mask = batch[\"attention_mask\"].to(device)\n",
+ " labels = batch[\"labels\"].to(device)\n",
+ " \n",
+ " optimizer.zero_grad()\n",
+ " \n",
+ " # Forward pass\n",
+ " outputs = model(input_ids, attention_mask)\n",
+ " \n",
+ " # Compute loss\n",
+ " loss = criterion(outputs, labels)\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " scheduler.step() # Update the learning rate based on the scheduler\n",
+ "\n",
+ " total_loss += loss.item()\n",
+ "\n",
+ "\n",
+ " # Print training loss\n",
+ " print(f\"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader)}\")\n",
+ "\n",
+ " # Save the best model based on loss\n",
+ " if total_loss < best_loss:\n",
+ " best_loss = total_loss\n",
+ " torch.save(model.state_dict(), \"best_model.pt\")\n",
+ "\n",
+ "# Load the best model for evaluation\n",
+ "model.load_state_dict(torch.load(\"best_model.pt\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Validation F1 Score per label (with dev data):\n",
+ "Label 0 F1 Score: 0.7742\n",
+ "Label 1 F1 Score: 0.8244\n",
+ "Label 2 F1 Score: 0.7451\n",
+ "Label 3 F1 Score: 0.8169\n",
+ "Label 4 F1 Score: 0.7692\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Evaluation\n",
+ "model.eval()\n",
+ "all_preds = []\n",
+ "all_labels = []\n",
+ "test_loss = 0.0\n",
+ "correct = 0\n",
+ "total = 0\n",
+ "\n",
+ "with torch.no_grad():\n",
+ " for batch in dev_loader:\n",
+ " input_ids = batch[\"input_ids\"].to(device)\n",
+ " attention_mask = batch[\"attention_mask\"].to(device)\n",
+ " labels = batch[\"labels\"].to(device)\n",
+ " \n",
+ " # Forward pass\n",
+ " outputs = model(input_ids, attention_mask)\n",
+ " \n",
+ " # Apply sigmoid and threshold to get binary predictions\n",
+ " preds = (torch.sigmoid(outputs) > 0.5).float() # Threshold at 0.5 for multi-label\n",
+ " \n",
+ " # Collect predictions and labels for F1 score\n",
+ " all_preds.append(preds.cpu().numpy())\n",
+ " all_labels.append(labels.cpu().numpy())\n",
+ "\n",
+ "# Flatten lists and calculate F1 score\n",
+ "all_preds = np.vstack(all_preds) \n",
+ "all_labels = np.vstack(all_labels) \n",
+ "\n",
+ "# Calculate F1 score for each label individually\n",
+ "f1_per_label = f1_score(all_labels, all_preds, average=None) # F1 score for each label\n",
+ "print(\"Validation F1 Score per label (with dev data):\")\n",
+ "for idx, score in enumerate(f1_per_label):\n",
+ " print(f\"Label {idx} F1 Score: {score:.4f}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "F1 Score per label (with human prediction):\n",
+ "Label 0 F1 Score: 0.4000\n",
+ "Label 1 F1 Score: 0.6383\n",
+ "Label 2 F1 Score: 0.6667\n",
+ "Label 3 F1 Score: 0.6400\n",
+ "Label 4 F1 Score: 0.5185\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Evaluation with human predictions\n",
+ "model.eval()\n",
+ "all_preds = []\n",
+ "all_labels = []\n",
+ "test_loss = 0.0\n",
+ "correct = 0\n",
+ "total = 0\n",
+ "\n",
+ "h_loader = DataLoader(h_dataset, batch_size=8, shuffle=False)\n",
+ "\n",
+ "with torch.no_grad():\n",
+ " for batch in h_loader:\n",
+ " input_ids = batch[\"input_ids\"].to(device)\n",
+ " attention_mask = batch[\"attention_mask\"].to(device)\n",
+ " labels = batch[\"labels\"].to(device)\n",
+ " \n",
+ " # Forward pass\n",
+ " outputs = model(input_ids, attention_mask)\n",
+ " \n",
+ " # Apply sigmoid and threshold to get binary predictions\n",
+ " preds = (torch.sigmoid(outputs) > 0.5).float() # Threshold at 0.5 for multi-label\n",
+ " \n",
+ " # Collect predictions and labels for F1 score\n",
+ " all_preds.append(preds.cpu().numpy())\n",
+ " all_labels.append(labels.cpu().numpy())\n",
+ "\n",
+ "# Flatten lists and calculate F1 score\n",
+ "all_preds = np.vstack(all_preds) \n",
+ "all_labels = np.vstack(all_labels) \n",
+ "\n",
+ "# Calculate F1 score for each label individually\n",
+ "f1_per_label = f1_score(all_labels, all_preds, average=None) # F1 score for each label\n",
+ "print(\"F1 Score per label (with human prediction):\")\n",
+ "for idx, score in enumerate(f1_per_label):\n",
+ " print(f\"Label {idx} F1 Score: {score:.4f}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Binary predictions saved to test_predictions.csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Predict the labels for test data and Store \n",
+ "predictions = []\n",
+ "\n",
+ "with torch.no_grad():\n",
+ " for batch in test_loader:\n",
+ " input_ids = batch[\"input_ids\"].to(device)\n",
+ " attention_mask = batch[\"attention_mask\"].to(device)\n",
+ " \n",
+ " outputs = model(input_ids, attention_mask)\n",
+ " preds = torch.sigmoid(outputs).cpu().numpy()\n",
+ " binary_preds = (preds >= 0.5).astype(int) # Apply thresholding\n",
+ " predictions.extend(binary_preds)\n",
+ "\n",
+ "# Convert predictions to DataFrame\n",
+ "test_df = pd.read_csv(test_file) # Load the original test file to get the text\n",
+ "preds_df = pd.DataFrame(predictions, columns=[\"anger\", \"fear\", \"joy\", \"sadness\", \"surprise\"])\n",
+ "output_df = pd.concat([test_df[\"text\"], preds_df], axis=1)\n",
+ "\n",
+ "# Save to CSV\n",
+ "output_df.to_csv(\"test_predictions.csv\", index=False)\n",
+ "print(\"Binary predictions saved to test_predictions.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "/tmp/ipykernel_933215/1227555123.py:4: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+ " model.load_state_dict(torch.load(\"best_model.pt\"), strict=False) # Load trained weights\n",
+ "PartitionExplainer explainer: 4it [01:18, 26.10s/it] \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sample 1: / o \\ So today I went in for a new exam with Dr. Polvi today, I had to file new paperwork for the automobile accident case which is being done differently then the scoliosis stuff.So he comes in and starts talking about insurance stuff and how this look bad since I was getting treatment on my neck and stuff already blah blah.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
outputs
\n",
+ "
anger
\n",
+ "
fear
\n",
+ "
joy
\n",
+ "
sadness
\n",
+ "
surprise
0-0.2-0.40.20.40.00174290.0017429base value0.0004373160.000437316fanger(inputs)0.013accident0.009case0.004done0.004automobile0.003which0.002new0.001is0.001Pol0.001vi0.0exam0.0with0.0to0.0the0.0for0.0I was getting treatment on my neck and0.0had0.00.0stuff already blah blah .-0.021differently-0.004today-0.004being-0.002file-0.002I-0.002paperwork-0.001Dr-0.001,-0.001.-0.0then-0.0iosis-0.0the-0.0stuff-0.0sc-0.0ol-0.0.-0.0I-0.0o-0.0\\-0.0/-0.0went-0.0-0.0So-0.0today-0.0for-0.0new-0.0in-0.0a-0.0So he comes in and starts talking about insurance stuff and how this look bad since
inputs
-0.0
-0.0
/
-0.0
o
-0.0
\\
-0.0
So
-0.0
today
-0.0
I
-0.0
went
-0.0
in
-0.0
for
-0.0
a
-0.0
new
0.0
exam
0.0
with
-0.001
Dr
-0.0
.
0.001
Pol
0.001
vi
-0.004
today
-0.001
,
-0.002
I
0.0
had
0.0
to
-0.002
file
0.002
new
-0.002
paperwork
0.0
for
0.0
the
0.004
automobile
0.013
accident
0.009
case
0.003
which
0.001
is
-0.004
being
0.004
done
-0.021
differently
-0.0
then
-0.0
the
-0.0
sc
-0.0
ol
-0.0
iosis
-0.0
stuff
-0.001
.
-0.0 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
0.0 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0-0.01-0.02-0.030.010.020.030.040.00174290.0017429base value0.0004373160.000437316fanger(inputs)0.013accident0.009case0.004done0.004automobile0.003which0.002new0.001is0.001Pol0.001vi0.0exam0.0with0.0to0.0the0.0for0.0I was getting treatment on my neck and0.0had0.00.0stuff already blah blah .-0.021differently-0.004today-0.004being-0.002file-0.002I-0.002paperwork-0.001Dr-0.001,-0.001.-0.0then-0.0iosis-0.0the-0.0stuff-0.0sc-0.0ol-0.0.-0.0I-0.0o-0.0\\-0.0/-0.0went-0.0-0.0So-0.0today-0.0for-0.0new-0.0in-0.0a-0.0So he comes in and starts talking about insurance stuff and how this look bad since
inputs
-0.0
-0.0
/
-0.0
o
-0.0
\\
-0.0
So
-0.0
today
-0.0
I
-0.0
went
-0.0
in
-0.0
for
-0.0
a
-0.0
new
0.0
exam
0.0
with
-0.001
Dr
-0.0
.
0.001
Pol
0.001
vi
-0.004
today
-0.001
,
-0.002
I
0.0
had
0.0
to
-0.002
file
0.002
new
-0.002
paperwork
0.0
for
0.0
the
0.004
automobile
0.013
accident
0.009
case
0.003
which
0.001
is
-0.004
being
0.004
done
-0.021
differently
-0.0
then
-0.0
the
-0.0
sc
-0.0
ol
-0.0
iosis
-0.0
stuff
-0.001
.
-0.0 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
0.0 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0-0.2-0.40.20.40.01803580.0180358base value5.25106e-055.25106e-05ffear(inputs)0.368accident0.042exam0.014new0.012with0.009new0.009a0.009\\0.008o0.007So0.007today0.006.0.004the0.002differently0.002being0.002done0.002is0.001/0.00.0stuff already blah blah .-0.067Dr-0.063case-0.063paperwork-0.046,-0.044which-0.035Pol-0.03today-0.024I-0.023I-0.023had-0.023went-0.02vi-0.013in-0.012for-0.011for-0.01automobile-0.007So he comes in and starts talking about insurance stuff and how this look bad since-0.002file-0.002I was getting treatment on my neck and-0.002to-0.001.-0.0ol-0.0then-0.0the-0.0sc-0.0stuff-0.0-0.0iosis
inputs
-0.0
0.001
/
0.008
o
0.009
\\
0.007
So
0.007
today
-0.023
I
-0.023
went
-0.013
in
-0.012
for
0.009
a
0.009
new
0.042
exam
0.012
with
-0.067
Dr
0.006
.
-0.035
Pol
-0.02
vi
-0.03
today
-0.046
,
-0.024
I
-0.023
had
-0.002
to
-0.002
file
0.014
new
-0.063
paperwork
-0.011
for
0.004
the
-0.01
automobile
0.368
accident
-0.063
case
-0.044
which
0.002
is
0.002
being
0.002
done
0.002
differently
-0.0
then
-0.0
the
-0.0
sc
-0.0
ol
-0.0
iosis
-0.0
stuff
-0.001
.
-0.007 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
-0.002 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0-0.2-0.40.20.40.01803580.0180358base value5.25106e-055.25106e-05ffear(inputs)0.368accident0.042exam0.014new0.012with0.009new0.009a0.009\\0.008o0.007So0.007today0.006.0.004the0.002differently0.002being0.002done0.002is0.001/0.00.0stuff already blah blah .-0.067Dr-0.063case-0.063paperwork-0.046,-0.044which-0.035Pol-0.03today-0.024I-0.023I-0.023had-0.023went-0.02vi-0.013in-0.012for-0.011for-0.01automobile-0.007So he comes in and starts talking about insurance stuff and how this look bad since-0.002file-0.002I was getting treatment on my neck and-0.002to-0.001.-0.0ol-0.0then-0.0the-0.0sc-0.0stuff-0.0-0.0iosis
inputs
-0.0
0.001
/
0.008
o
0.009
\\
0.007
So
0.007
today
-0.023
I
-0.023
went
-0.013
in
-0.012
for
0.009
a
0.009
new
0.042
exam
0.012
with
-0.067
Dr
0.006
.
-0.035
Pol
-0.02
vi
-0.03
today
-0.046
,
-0.024
I
-0.023
had
-0.002
to
-0.002
file
0.014
new
-0.063
paperwork
-0.011
for
0.004
the
-0.01
automobile
0.368
accident
-0.063
case
-0.044
which
0.002
is
0.002
being
0.002
done
0.002
differently
-0.0
then
-0.0
the
-0.0
sc
-0.0
ol
-0.0
iosis
-0.0
stuff
-0.001
.
-0.007 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
-0.002 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0-0.2-0.40.20.40.000364440.00036444base value0.006739140.00673914fjoy(inputs)0.001Dr0.001.0.0,0.0I was getting treatment on my neck and0.0done0.0with0.0being0.0a0.0I0.0for0.0exam0.0is0.0in0.0new0.0went0.0today0.0.0.0differently0.0for0.0ol0.0then0.0Pol0.0vi0.0stuff0.0the0.0sc0.0So0.0I0.0iosis0.0had0.0today0.0new0.0automobile0.0paperwork0.0the0.0file0.0to-0.001accident-0.001So he comes in and starts talking about insurance stuff and how this look bad since-0.0which-0.0\\-0.0o-0.0case-0.0/-0.0-0.0-0.0stuff already blah blah .
inputs
-0.0
-0.0
/
-0.0
o
-0.0
\\
0.0
So
0.0
today
0.0
I
0.0
went
0.0
in
0.0
for
0.0
a
0.0
new
0.0
exam
0.0
with
0.001
Dr
0.001
.
0.0
Pol
0.0
vi
0.0
today
0.0
,
0.0
I
0.0
had
0.0
to
0.0
file
0.0
new
0.0
paperwork
0.0
for
0.0
the
0.0
automobile
-0.001
accident
-0.0
case
-0.0
which
0.0
is
0.0
being
0.0
done
0.0
differently
0.0
then
0.0
the
0.0
sc
0.0
ol
0.0
iosis
0.0
stuff
0.0
.
-0.001 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
0.0 / 8
I was getting treatment on my neck and
-0.0 / 5
stuff already blah blah .
-0.0
0.0040.00200.0060.0080.000364440.00036444base value0.006739140.00673914fjoy(inputs)0.001Dr0.001.0.0,0.0I was getting treatment on my neck and0.0done0.0with0.0being0.0a0.0I0.0for0.0exam0.0is0.0in0.0new0.0went0.0today0.0.0.0differently0.0for0.0ol0.0then0.0Pol0.0vi0.0stuff0.0the0.0sc0.0So0.0I0.0iosis0.0had0.0today0.0new0.0automobile0.0paperwork0.0the0.0file0.0to-0.001accident-0.001So he comes in and starts talking about insurance stuff and how this look bad since-0.0which-0.0\\-0.0o-0.0case-0.0/-0.0-0.0-0.0stuff already blah blah .
inputs
-0.0
-0.0
/
-0.0
o
-0.0
\\
0.0
So
0.0
today
0.0
I
0.0
went
0.0
in
0.0
for
0.0
a
0.0
new
0.0
exam
0.0
with
0.001
Dr
0.001
.
0.0
Pol
0.0
vi
0.0
today
0.0
,
0.0
I
0.0
had
0.0
to
0.0
file
0.0
new
0.0
paperwork
0.0
for
0.0
the
0.0
automobile
-0.001
accident
-0.0
case
-0.0
which
0.0
is
0.0
being
0.0
done
0.0
differently
0.0
then
0.0
the
0.0
sc
0.0
ol
0.0
iosis
0.0
stuff
0.0
.
-0.001 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
0.0 / 8
I was getting treatment on my neck and
-0.0 / 5
stuff already blah blah .
-0.0
0-0.2-0.40.20.40.0001122890.000112289base value0.01125860.0112586fsadness(inputs)0.001,0.001done0.001accident0.001which0.001case0.001today0.001.0.0automobile0.0So0.0today0.0I0.0went0.0o0.0\\0.0/0.00.0had0.0new0.0in0.0to0.0the0.0for0.0a0.0new0.0paperwork0.0file0.0vi0.0Pol0.0I0.0with0.0exam0.0for0.0So he comes in and starts talking about insurance stuff and how this look bad since0.0I was getting treatment on my neck and0.0Dr0.00.0stuff already blah blah .-0.003differently-0.001is-0.001being-0.0stuff-0.0iosis-0.0.-0.0sc-0.0ol-0.0the-0.0then
inputs
0.0
0.0
/
0.0
o
0.0
\\
0.0
So
0.0
today
0.0
I
0.0
went
0.0
in
0.0
for
0.0
a
0.0
new
0.0
exam
0.0
with
0.0
Dr
0.001
.
0.0
Pol
0.0
vi
0.001
today
0.001
,
0.0
I
0.0
had
0.0
to
0.0
file
0.0
new
0.0
paperwork
0.0
for
0.0
the
0.0
automobile
0.001
accident
0.001
case
0.001
which
-0.001
is
-0.001
being
0.001
done
-0.003
differently
-0.0
then
-0.0
the
-0.0
sc
-0.0
ol
-0.0
iosis
-0.0
stuff
-0.0
.
0.0 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
0.0 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0.0060.0030-0.0030.0090.0120.0150.0001122890.000112289base value0.01125860.0112586fsadness(inputs)0.001,0.001done0.001accident0.001which0.001case0.001today0.001.0.0automobile0.0So0.0today0.0I0.0went0.0o0.0\\0.0/0.00.0had0.0new0.0in0.0to0.0the0.0for0.0a0.0new0.0paperwork0.0file0.0vi0.0Pol0.0I0.0with0.0exam0.0for0.0So he comes in and starts talking about insurance stuff and how this look bad since0.0I was getting treatment on my neck and0.0Dr0.00.0stuff already blah blah .-0.003differently-0.001is-0.001being-0.0stuff-0.0iosis-0.0.-0.0sc-0.0ol-0.0the-0.0then
inputs
0.0
0.0
/
0.0
o
0.0
\\
0.0
So
0.0
today
0.0
I
0.0
went
0.0
in
0.0
for
0.0
a
0.0
new
0.0
exam
0.0
with
0.0
Dr
0.001
.
0.0
Pol
0.0
vi
0.001
today
0.001
,
0.0
I
0.0
had
0.0
to
0.0
file
0.0
new
0.0
paperwork
0.0
for
0.0
the
0.0
automobile
0.001
accident
0.001
case
0.001
which
-0.001
is
-0.001
being
0.001
done
-0.003
differently
-0.0
then
-0.0
the
-0.0
sc
-0.0
ol
-0.0
iosis
-0.0
stuff
-0.0
.
0.0 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
0.0 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0-0.2-0.40.20.40.02316980.0231698base value0.000126060.00012606fsurprise(inputs)0.113accident0.065automobile0.059differently0.041which0.03exam0.018the0.013.0.013/0.012with0.01being0.006stuff0.005iosis0.005sc0.004\\0.003new0.003the0.003o0.003a0.002then0.001So he comes in and starts talking about insurance stuff and how this look bad since0.001is0.0010.001.0.0ol0.00.0stuff already blah blah .-0.086paperwork-0.075Dr-0.031,-0.029case-0.025file-0.024I-0.023went-0.02I-0.019in-0.019for-0.019to-0.014had-0.014for-0.011vi-0.006Pol-0.006I was getting treatment on my neck and-0.006today-0.004So-0.002done-0.002new-0.001today
inputs
0.001
0.013
/
0.003
o
0.004
\\
-0.004
So
-0.006
today
-0.024
I
-0.023
went
-0.019
in
-0.019
for
0.003
a
0.003
new
0.03
exam
0.012
with
-0.075
Dr
0.001
.
-0.006
Pol
-0.011
vi
-0.001
today
-0.031
,
-0.02
I
-0.014
had
-0.019
to
-0.025
file
-0.002
new
-0.086
paperwork
-0.014
for
0.018
the
0.065
automobile
0.113
accident
-0.029
case
0.041
which
0.001
is
0.01
being
-0.002
done
0.059
differently
0.002
then
0.003
the
0.005
sc
0.0
ol
0.005
iosis
0.006
stuff
0.013
.
0.001 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
-0.006 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
0-0.1-0.2-0.3-0.40.10.20.30.40.02316980.0231698base value0.000126060.00012606fsurprise(inputs)0.113accident0.065automobile0.059differently0.041which0.03exam0.018the0.013.0.013/0.012with0.01being0.006stuff0.005iosis0.005sc0.004\\0.003new0.003the0.003o0.003a0.002then0.001So he comes in and starts talking about insurance stuff and how this look bad since0.001is0.0010.001.0.0ol0.00.0stuff already blah blah .-0.086paperwork-0.075Dr-0.031,-0.029case-0.025file-0.024I-0.023went-0.02I-0.019in-0.019for-0.019to-0.014had-0.014for-0.011vi-0.006Pol-0.006I was getting treatment on my neck and-0.006today-0.004So-0.002done-0.002new-0.001today
inputs
0.001
0.013
/
0.003
o
0.004
\\
-0.004
So
-0.006
today
-0.024
I
-0.023
went
-0.019
in
-0.019
for
0.003
a
0.003
new
0.03
exam
0.012
with
-0.075
Dr
0.001
.
-0.006
Pol
-0.011
vi
-0.001
today
-0.031
,
-0.02
I
-0.014
had
-0.019
to
-0.025
file
-0.002
new
-0.086
paperwork
-0.014
for
0.018
the
0.065
automobile
0.113
accident
-0.029
case
0.041
which
0.001
is
0.01
being
-0.002
done
0.059
differently
0.002
then
0.003
the
0.005
sc
0.0
ol
0.005
iosis
0.006
stuff
0.013
.
0.001 / 16
So he comes in and starts talking about insurance stuff and how this look bad since
-0.006 / 8
I was getting treatment on my neck and
0.0 / 5
stuff already blah blah .
0.0
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sample 2: The image I have in my mind is this: a group of well-meaning vaguely sadistic folks digging a deep pit in my heart and mind that can only be filled with fear.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
0-0.2-0.40.20.40.01790460.0179046base value4.37732e-054.37732e-05ffear(inputs)0.052fear0.033that0.032filled0.028mind0.017with0.01pit0.009in0.001.0.0-0.06be-0.051can-0.04only-0.017heart-0.017my-0.002and-0.002is this : a-0.002The image I-0.002group of well --0.002folks digging a deep-0.001my mind-0.001have in-0.001sad-0.0meaning-0.0vaguely-0.0istic
inputs
-0.002 / 4
The image I
-0.001 / 2
have in
-0.001 / 2
my mind
-0.002 / 4
is this : a
-0.002 / 4
group of well -
-0.0
meaning
-0.0
vaguely
-0.001
sad
-0.0
istic
-0.002 / 4
folks digging a deep
0.01
pit
0.009
in
-0.017
my
-0.017
heart
-0.002
and
0.028
mind
0.033
that
-0.051
can
-0.04
only
-0.06
be
0.032
filled
0.017
with
0.052
fear
0.001
.
0.0
0-0.10.10.20.01790460.0179046base value4.37732e-054.37732e-05ffear(inputs)0.052fear0.033that0.032filled0.028mind0.017with0.01pit0.009in0.001.0.0-0.06be-0.051can-0.04only-0.017heart-0.017my-0.002and-0.002is this : a-0.002The image I-0.002group of well --0.002folks digging a deep-0.001my mind-0.001have in-0.001sad-0.0meaning-0.0vaguely-0.0istic
0-0.2-0.40.20.40.01790420.0179042base value4.3773e-054.3773e-05ffear(inputs)0.052fear0.033that0.032filled0.028mind0.017with0.01pit0.009in0.001.-0.06be-0.051can-0.04only-0.017heart-0.017my-0.002and-0.002is this : a-0.002The image I-0.002group of well --0.002folks digging a deep-0.001my mind-0.001have in-0.001sad-0.0meaning-0.0vaguely-0.0istic-0.0
inputs
-0.002 / 4
The image I
-0.001 / 2
have in
-0.001 / 2
my mind
-0.002 / 4
is this : a
-0.002 / 4
group of well -
-0.0
meaning
-0.0
vaguely
-0.001
sad
-0.0
istic
-0.002 / 4
folks digging a deep
0.01
pit
0.009
in
-0.017
my
-0.017
heart
-0.002
and
0.028
mind
0.033
that
-0.051
can
-0.04
only
-0.06
be
0.032
filled
0.017
with
0.052
fear
0.001
.
-0.0
0-0.10.10.20.01790420.0179042base value4.3773e-054.3773e-05ffear(inputs)0.052fear0.033that0.032filled0.028mind0.017with0.01pit0.009in0.001.-0.06be-0.051can-0.04only-0.017heart-0.017my-0.002and-0.002is this : a-0.002The image I-0.002group of well --0.002folks digging a deep-0.001my mind-0.001have in-0.001sad-0.0meaning-0.0vaguely-0.0istic-0.0
"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ " Sample 2: The image I have in my mind is this: a group of well-meaning vaguely sadistic folks digging a deep pit in my heart and mind that can only be filled with fear.\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "