Skip to content

Commit 319b300

Browse files
authored
performance improvements for spot location refinement code (#96)
* performance improvements for spot location refinement code; pinning some packages in requirements.txt * updating lookup dict to use collections' defaultdict
1 parent 1a4c9df commit 319b300

File tree

3 files changed

+374
-21
lines changed

3 files changed

+374
-21
lines changed
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "2ec63918-562a-4e7f-8098-a9c83b6d81d6",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stderr",
11+
"output_type": "stream",
12+
"text": [
13+
"/home/eemerson/venvs/deepcell-spots/lib/python3.10/site-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
14+
" super(SGD, self).__init__(name, **kwargs)\n"
15+
]
16+
}
17+
],
18+
"source": [
19+
"import pandas as pd\n",
20+
"import numpy as np\n",
21+
"import tifffile as tiff\n",
22+
"from deepcell_spots.applications import Polaris\n",
23+
"from pathlib import Path\n",
24+
"import tensorflow as tf\n",
25+
"import os\n",
26+
"\n",
27+
"\n",
28+
"SG_MPP_60X = 0.10727\n",
29+
"\n",
30+
"data_dir = Path('/mnt/deepcell_data/users/ellen/macrophages/signaling')"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 2,
36+
"id": "ee5a3782-2308-4070-a769-0127ece89a46",
37+
"metadata": {},
38+
"outputs": [],
39+
"source": [
40+
"# tensorflow/GPU setup\n",
41+
"device_indices = '1'\n",
42+
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"{}\".format(device_indices)\n",
43+
"\n",
44+
"physical_devices = tf.config.experimental.list_physical_devices('GPU')\n",
45+
"assert len(physical_devices) >= 1 , \"GPU Configuration failed\"\n",
46+
"\n",
47+
"# IMPORTANT: without this config, the GPU will run out of memory trying to run Polaris\n",
48+
"for device in physical_devices:\n",
49+
" tf.config.experimental.set_memory_growth(device, True)"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 3,
55+
"id": "8698200a-31f1-4afe-a877-dc76b67cc321",
56+
"metadata": {},
57+
"outputs": [
58+
{
59+
"name": "stderr",
60+
"output_type": "stream",
61+
"text": [
62+
"2025-09-08 14:59:00.876518: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
63+
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
64+
"2025-09-08 14:59:01.401743: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29909 MB memory: -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:23:00.0, compute capability: 8.6\n"
65+
]
66+
},
67+
{
68+
"name": "stdout",
69+
"output_type": "stream",
70+
"text": [
71+
"WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
72+
]
73+
},
74+
{
75+
"name": "stderr",
76+
"output_type": "stream",
77+
"text": [
78+
"INFO:root:Checking for cached data\n",
79+
"INFO:root:Checking SpotDetection-8.tar.gz against provided file_hash...\n",
80+
"INFO:root:SpotDetection-8.tar.gz with hash a6164e48ef8872a9524b4ec6726859d7 already available.\n",
81+
"INFO:root:Extracting /home/eemerson/.deepcell/models/SpotDetection-8.tar.gz\n",
82+
"INFO:root:Successfully extracted /home/eemerson/.deepcell/models/SpotDetection-8.tar.gz into /home/eemerson/.deepcell/models\n"
83+
]
84+
}
85+
],
86+
"source": [
87+
"c0_codebook = pd.read_csv(data_dir / 'spatial_genomics_barcodes/extended_panel/df_barcodes_c0.csv', index_col=0)\n",
88+
"\n",
89+
"rounds = 20 # number of hybridizations\n",
90+
"\n",
91+
"# run this once to ensure model is downloaded\n",
92+
"# nuc_app = NuclearSegmentation.from_version('1.1')\n",
93+
"\n",
94+
"# but then we want the model itself\n",
95+
"model_dir = Path.home() / \".deepcell\" / \"models\"\n",
96+
"model_path = model_dir / 'NuclearSegmentation'\n",
97+
"nuc_model = tf.keras.models.load_model(model_path)\n",
98+
"\n",
99+
"polaris_app_c0 = Polaris(image_type='multiplex',\n",
100+
" segmentation_type='nucleus',\n",
101+
" segmentation_model=nuc_model,\n",
102+
" decoding_kwargs={'rounds': rounds,\n",
103+
" 'channels': 1,\n",
104+
" 'df_barcodes': c0_codebook})"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": 4,
110+
"id": "dd1977ca-bb0a-41cb-99ee-c6a069fb168f",
111+
"metadata": {},
112+
"outputs": [
113+
{
114+
"data": {
115+
"text/plain": [
116+
"(4, 8400, 8400, 20)"
117+
]
118+
},
119+
"execution_count": 4,
120+
"metadata": {},
121+
"output_type": "execute_result"
122+
}
123+
],
124+
"source": [
125+
"img = tiff.imread(data_dir / '20250820-EE_prim_mac_JNK-p65_LPS_paired/100_ng_mL/spatial_genomics/full_scale/cropped_regions/fov_0.tiff')\n",
126+
"img = img.astype('float32')\n",
127+
"img.shape"
128+
]
129+
},
130+
{
131+
"cell_type": "code",
132+
"execution_count": 5,
133+
"id": "bfac0f09-90a2-44ac-b246-53c53e6d497d",
134+
"metadata": {},
135+
"outputs": [
136+
{
137+
"name": "stdout",
138+
"output_type": "stream",
139+
"text": [
140+
"(1, 8400, 8400, 20)\n",
141+
"(1, 8400, 8400, 1)\n"
142+
]
143+
}
144+
],
145+
"source": [
146+
"single_channel_image = np.expand_dims(img[0], axis=0)\n",
147+
"nuc_img = np.expand_dims(np.expand_dims(img[-1,...,0], axis=-1), axis=0)\n",
148+
"\n",
149+
"print(single_channel_image.shape)\n",
150+
"print(nuc_img.shape)"
151+
]
152+
},
153+
{
154+
"cell_type": "code",
155+
"execution_count": 6,
156+
"id": "521ce244-98d4-4d44-b4ae-151174e9584a",
157+
"metadata": {},
158+
"outputs": [
159+
{
160+
"name": "stdout",
161+
"output_type": "stream",
162+
"text": [
163+
"Validating inputs.\n",
164+
"Predicting spot locations.\n"
165+
]
166+
},
167+
{
168+
"name": "stderr",
169+
"output_type": "stream",
170+
"text": [
171+
" 0%| | 0/20 [00:00<?, ?it/s]2025-09-08 15:04:03.907666: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8204\n",
172+
"2025-09-08 15:04:06.134474: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
173+
"100%|███████████████████████████████████████████████████████████████████| 20/20 [50:06<00:00, 150.33s/it]\n"
174+
]
175+
},
176+
{
177+
"name": "stdout",
178+
"output_type": "stream",
179+
"text": [
180+
"Segmenting cells.\n",
181+
"Decoding gene identities.\n"
182+
]
183+
},
184+
{
185+
"name": "stderr",
186+
"output_type": "stream",
187+
"text": [
188+
"/home/eemerson/venvs/deepcell-spots/lib/python3.10/site-packages/torch/__init__.py:749: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:431.)\n",
189+
" _C._set_default_tensor_type(t)\n"
190+
]
191+
},
192+
{
193+
"name": "stdout",
194+
"output_type": "stream",
195+
"text": [
196+
"Training...\n"
197+
]
198+
},
199+
{
200+
"name": "stderr",
201+
"output_type": "stream",
202+
"text": [
203+
"100%|██████████████████████████████████████████████████████████████████| 500/500 [02:41<00:00, 3.09it/s]\n"
204+
]
205+
},
206+
{
207+
"name": "stdout",
208+
"output_type": "stream",
209+
"text": [
210+
"Estimating barcode probabilities...\n",
211+
"Refining spot locations.\n",
212+
"Refining spot locations.\n"
213+
]
214+
}
215+
],
216+
"source": [
217+
"old_results, new_results, seg = polaris_app_c0.predict(single_channel_image,\n",
218+
" segmentation_image=nuc_img,\n",
219+
" image_mpp=SG_MPP_60X,\n",
220+
" decoding_training_kwargs={'rescue_errors': False,\n",
221+
" 'rescue_mixed': False,\n",
222+
" 'pred_prob_thresh': 0.95})"
223+
]
224+
},
225+
{
226+
"cell_type": "code",
227+
"execution_count": 7,
228+
"id": "cc9b13be-bd1b-4084-9413-688acfea4953",
229+
"metadata": {},
230+
"outputs": [],
231+
"source": [
232+
"def compare_dfs(df1, df2, verbose=True):\n",
233+
" identical = True\n",
234+
"\n",
235+
" # 1. Shape\n",
236+
" if df1.shape != df2.shape:\n",
237+
" if verbose:\n",
238+
" print(f\"Shape differs: {df1.shape} vs {df2.shape}\")\n",
239+
" identical = False\n",
240+
"\n",
241+
" # 2. Columns\n",
242+
" if not df1.columns.equals(df2.columns):\n",
243+
" if verbose:\n",
244+
" print(f\"Columns differ:\")\n",
245+
" print(\"Only in df1:\", df1.columns.difference(df2.columns).tolist())\n",
246+
" print(\"Only in df2:\", df2.columns.difference(df1.columns).tolist())\n",
247+
" identical = False\n",
248+
"\n",
249+
" # 3. Index\n",
250+
" if not df1.index.equals(df2.index):\n",
251+
" if verbose:\n",
252+
" print(\"Index differs\")\n",
253+
" identical = False\n",
254+
"\n",
255+
" # 4. Dtypes\n",
256+
" if not (df1.dtypes == df2.dtypes).all():\n",
257+
" if verbose:\n",
258+
" print(\"Dtypes differ:\")\n",
259+
" print(\"df1 dtypes:\\n\", df1.dtypes)\n",
260+
" print(\"df2 dtypes:\\n\", df2.dtypes)\n",
261+
" identical = False\n",
262+
"\n",
263+
" # 5. Values (per column)\n",
264+
" for col in df1.columns:\n",
265+
" s1 = df1[col]\n",
266+
" s2 = df2[col]\n",
267+
"\n",
268+
" # Align indices to avoid ValueError\n",
269+
" s1_aligned, s2_aligned = s1.align(s2)\n",
270+
"\n",
271+
" if pd.api.types.is_numeric_dtype(s1):\n",
272+
" mask_diff = ~((s1_aligned.fillna(np.nan) == s2_aligned.fillna(np.nan)))\n",
273+
" else:\n",
274+
" mask_diff = ~((s1_aligned == s2_aligned) | (s1_aligned.isna() & s2_aligned.isna()))\n",
275+
"\n",
276+
" if mask_diff.any():\n",
277+
" if verbose:\n",
278+
" print(f\"Differences found in column '{col}':\")\n",
279+
" for idx in s1_aligned.index[mask_diff]:\n",
280+
" print(f\" Row {idx}: {s1_aligned.loc[idx]} != {s2_aligned.loc[idx]}\")\n",
281+
" identical = False\n",
282+
"\n",
283+
" if identical and verbose:\n",
284+
" print(\"DataFrames are identical!\")\n",
285+
"\n",
286+
" return identical"
287+
]
288+
},
289+
{
290+
"cell_type": "code",
291+
"execution_count": 8,
292+
"id": "1c10a7fd-e039-491e-b160-f887a219db76",
293+
"metadata": {
294+
"scrolled": true
295+
},
296+
"outputs": [
297+
{
298+
"name": "stdout",
299+
"output_type": "stream",
300+
"text": [
301+
"DataFrames are identical!\n"
302+
]
303+
},
304+
{
305+
"data": {
306+
"text/plain": [
307+
"True"
308+
]
309+
},
310+
"execution_count": 8,
311+
"metadata": {},
312+
"output_type": "execute_result"
313+
}
314+
],
315+
"source": [
316+
"compare_dfs(old_results, new_results)"
317+
]
318+
},
319+
{
320+
"cell_type": "code",
321+
"execution_count": null,
322+
"id": "c4210d68-aad7-41dd-9a61-2fc688000c3e",
323+
"metadata": {},
324+
"outputs": [],
325+
"source": []
326+
}
327+
],
328+
"metadata": {
329+
"kernelspec": {
330+
"display_name": "Python 3 (ipykernel)",
331+
"language": "python",
332+
"name": "python3"
333+
},
334+
"language_info": {
335+
"codemirror_mode": {
336+
"name": "ipython",
337+
"version": 3
338+
},
339+
"file_extension": ".py",
340+
"mimetype": "text/x-python",
341+
"name": "python",
342+
"nbconvert_exporter": "python",
343+
"pygments_lexer": "ipython3",
344+
"version": "3.10.12"
345+
}
346+
},
347+
"nbformat": 4,
348+
"nbformat_minor": 5
349+
}

0 commit comments

Comments
 (0)