vanvalenlab
diff --git a/‎20250905_test_polaris_spot_refinement_optimization.ipynb‎
Lines changed: 349 additions & 0 deletions b/‎20250905_test_polaris_spot_refinement_optimization.ipynb‎
Lines changed: 349 additions & 0 deletions
@@ -0,0 +1,349 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2ec63918-562a-4e7f-8098-a9c83b6d81d6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/eemerson/venvs/deepcell-spots/lib/python3.10/site-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
+      "  super(SGD, self).__init__(name, **kwargs)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import tifffile as tiff\n",
+    "from deepcell_spots.applications import Polaris\n",
+    "from pathlib import Path\n",
+    "import tensorflow as tf\n",
+    "import os\n",
+    "\n",
+    "\n",
+    "SG_MPP_60X = 0.10727\n",
+    "\n",
+    "data_dir = Path('/mnt/deepcell_data/users/ellen/macrophages/signaling')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ee5a3782-2308-4070-a769-0127ece89a46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tensorflow/GPU setup\n",
+    "device_indices = '1'\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"{}\".format(device_indices)\n",
+    "\n",
+    "physical_devices = tf.config.experimental.list_physical_devices('GPU')\n",
+    "assert len(physical_devices) >= 1 , \"GPU Configuration failed\"\n",
+    "\n",
+    "# IMPORTANT: without this config, the GPU will run out of memory trying to run Polaris\n",
+    "for device in physical_devices:\n",
+    "    tf.config.experimental.set_memory_growth(device, True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8698200a-31f1-4afe-a877-dc76b67cc321",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-09-08 14:59:00.876518: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2025-09-08 14:59:01.401743: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29909 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:23:00.0, compute capability: 8.6\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Checking for cached data\n",
+      "INFO:root:Checking SpotDetection-8.tar.gz against provided file_hash...\n",
+      "INFO:root:SpotDetection-8.tar.gz with hash a6164e48ef8872a9524b4ec6726859d7 already available.\n",
+      "INFO:root:Extracting /home/eemerson/.deepcell/models/SpotDetection-8.tar.gz\n",
+      "INFO:root:Successfully extracted /home/eemerson/.deepcell/models/SpotDetection-8.tar.gz into /home/eemerson/.deepcell/models\n"
+     ]
+    }
+   ],
+   "source": [
+    "c0_codebook = pd.read_csv(data_dir / 'spatial_genomics_barcodes/extended_panel/df_barcodes_c0.csv', index_col=0)\n",
+    "\n",
+    "rounds = 20 # number of hybridizations\n",
+    "\n",
+    "# run this once to ensure model is downloaded\n",
+    "# nuc_app = NuclearSegmentation.from_version('1.1')\n",
+    "\n",
+    "# but then we want the model itself\n",
+    "model_dir = Path.home() / \".deepcell\" / \"models\"\n",
+    "model_path = model_dir / 'NuclearSegmentation'\n",
+    "nuc_model = tf.keras.models.load_model(model_path)\n",
+    "\n",
+    "polaris_app_c0 = Polaris(image_type='multiplex',\n",
+    "                         segmentation_type='nucleus',\n",
+    "                         segmentation_model=nuc_model,\n",
+    "                         decoding_kwargs={'rounds': rounds,\n",
+    "                                          'channels': 1,\n",
+    "                                          'df_barcodes': c0_codebook})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "dd1977ca-bb0a-41cb-99ee-c6a069fb168f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4, 8400, 8400, 20)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "img = tiff.imread(data_dir / '20250820-EE_prim_mac_JNK-p65_LPS_paired/100_ng_mL/spatial_genomics/full_scale/cropped_regions/fov_0.tiff')\n",
+    "img = img.astype('float32')\n",
+    "img.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "bfac0f09-90a2-44ac-b246-53c53e6d497d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(1, 8400, 8400, 20)\n",
+      "(1, 8400, 8400, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "single_channel_image = np.expand_dims(img[0], axis=0)\n",
+    "nuc_img = np.expand_dims(np.expand_dims(img[-1,...,0], axis=-1), axis=0)\n",
+    "\n",
+    "print(single_channel_image.shape)\n",
+    "print(nuc_img.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "521ce244-98d4-4d44-b4ae-151174e9584a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validating inputs.\n",
+      "Predicting spot locations.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|                                                                             | 0/20 [00:00<?, ?it/s]2025-09-08 15:04:03.907666: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8204\n",
+      "2025-09-08 15:04:06.134474: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
+      "100%|███████████████████████████████████████████████████████████████████| 20/20 [50:06<00:00, 150.33s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Segmenting cells.\n",
+      "Decoding gene identities.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/eemerson/venvs/deepcell-spots/lib/python3.10/site-packages/torch/__init__.py:749: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:431.)\n",
+      "  _C._set_default_tensor_type(t)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████████████████████████████████████████████████████████████| 500/500 [02:41<00:00,  3.09it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Estimating barcode probabilities...\n",
+      "Refining spot locations.\n",
+      "Refining spot locations.\n"
+     ]
+    }
+   ],
+   "source": [
+    "old_results, new_results, seg = polaris_app_c0.predict(single_channel_image,\n",
+    "                                        segmentation_image=nuc_img,\n",
+    "                                        image_mpp=SG_MPP_60X,\n",
+    "                                        decoding_training_kwargs={'rescue_errors': False,\n",
+    "                                                                  'rescue_mixed': False,\n",
+    "                                                                  'pred_prob_thresh': 0.95})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "cc9b13be-bd1b-4084-9413-688acfea4953",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compare_dfs(df1, df2, verbose=True):\n",
+    "    identical = True\n",
+    "\n",
+    "    # 1. Shape\n",
+    "    if df1.shape != df2.shape:\n",
+    "        if verbose:\n",
+    "            print(f\"Shape differs: {df1.shape} vs {df2.shape}\")\n",
+    "        identical = False\n",
+    "\n",
+    "    # 2. Columns\n",
+    "    if not df1.columns.equals(df2.columns):\n",
+    "        if verbose:\n",
+    "            print(f\"Columns differ:\")\n",
+    "            print(\"Only in df1:\", df1.columns.difference(df2.columns).tolist())\n",
+    "            print(\"Only in df2:\", df2.columns.difference(df1.columns).tolist())\n",
+    "        identical = False\n",
+    "\n",
+    "    # 3. Index\n",
+    "    if not df1.index.equals(df2.index):\n",
+    "        if verbose:\n",
+    "            print(\"Index differs\")\n",
+    "        identical = False\n",
+    "\n",
+    "    # 4. Dtypes\n",
+    "    if not (df1.dtypes == df2.dtypes).all():\n",
+    "        if verbose:\n",
+    "            print(\"Dtypes differ:\")\n",
+    "            print(\"df1 dtypes:\\n\", df1.dtypes)\n",
+    "            print(\"df2 dtypes:\\n\", df2.dtypes)\n",
+    "        identical = False\n",
+    "\n",
+    "    # 5. Values (per column)\n",
+    "    for col in df1.columns:\n",
+    "        s1 = df1[col]\n",
+    "        s2 = df2[col]\n",
+    "\n",
+    "        # Align indices to avoid ValueError\n",
+    "        s1_aligned, s2_aligned = s1.align(s2)\n",
+    "\n",
+    "        if pd.api.types.is_numeric_dtype(s1):\n",
+    "            mask_diff = ~((s1_aligned.fillna(np.nan) == s2_aligned.fillna(np.nan)))\n",
+    "        else:\n",
+    "            mask_diff = ~((s1_aligned == s2_aligned) | (s1_aligned.isna() & s2_aligned.isna()))\n",
+    "\n",
+    "        if mask_diff.any():\n",
+    "            if verbose:\n",
+    "                print(f\"Differences found in column '{col}':\")\n",
+    "                for idx in s1_aligned.index[mask_diff]:\n",
+    "                    print(f\"  Row {idx}: {s1_aligned.loc[idx]} != {s2_aligned.loc[idx]}\")\n",
+    "            identical = False\n",
+    "\n",
+    "    if identical and verbose:\n",
+    "        print(\"DataFrames are identical!\")\n",
+    "\n",
+    "    return identical"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1c10a7fd-e039-491e-b160-f887a219db76",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DataFrames are identical!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compare_dfs(old_results, new_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4210d68-aad7-41dd-9a61-2fc688000c3e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}