Skip to content

Commit d448461

Browse files
committed
Punctate analysis methods
1 parent 1068809 commit d448461

File tree

119 files changed

+10795
-3440
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+10795
-3440
lines changed

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.9

__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21

32
"""Top-level package for cellpack_analysis."""
43

cellpack_analysis/analysis/occupancy_analysis/calculate_available_space.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,63 +9,65 @@
99
distances are saved in a grid directory for each cellid.
1010
Distances are normalized by the cell diameter and saved in the grid directory.
1111
"""
12+
import logging
1213
from concurrent.futures import ProcessPoolExecutor, as_completed
13-
from pathlib import Path
1414

1515
import pandas as pd
1616
from tqdm import tqdm
1717

18+
from cellpack_analysis.lib.file_io import get_project_root
1819
from cellpack_analysis.lib.mesh_tools import calculate_grid_distances
1920

21+
log = logging.getLogger(__name__)
2022
# %% set structure id
21-
STRUCTURE_ID = "RAB5A" # SLC25A17: peroxisomes, RAB5A: early endosomes
23+
STRUCTURE_ID = "mean" # SLC25A17: peroxisomes, RAB5A: early endosomes
2224
# %% set up parameters for grid
2325
SPACING = 2
2426
# %% set file paths and setup parameters
25-
base_datadir = Path(__file__).parents[3] / "data"
26-
print(f"Data directory: {base_datadir}")
27+
base_datadir = get_project_root() / "data"
28+
log.info(f"Data directory: {base_datadir}")
2729

2830
# %% select cellids to use
2931
use_mean_shape = True
3032
if use_mean_shape:
31-
mesh_folder = base_datadir / "average_shape_meshes"
3233
cellids_to_use = ["mean"]
3334
else:
34-
mesh_folder = base_datadir / f"structure_data/{STRUCTURE_ID}/meshes/"
3535
df_cellid = pd.read_csv("s3://cellpack-analysis-data/all_cellids.csv")
3636
df_struct = df_cellid.loc[df_cellid["structure_name"] == STRUCTURE_ID]
37-
cellids_to_use = df_struct.loc[df_struct["8dsphere"], "CellId"]
38-
print(f"Using {len(cellids_to_use)} cellids")
37+
cellids_to_use = df_struct.loc[df_struct["8dsphere"], "CellId"].tolist()
38+
mesh_folder = base_datadir / f"structure_data/{STRUCTURE_ID}/meshes/"
39+
log.info(f"Using {len(cellids_to_use)} cellids")
3940
# %% get meshes for cellids used
41+
# cellids_to_use = [cellids_to_use[0]]
4042
cellid_list = []
4143
nuc_meshes_to_use = []
4244
mem_meshes_to_use = []
4345
for cellid in cellids_to_use:
44-
nuc_mesh = mesh_folder / f"nuc_mesh_{cellid}.obj"
45-
mem_mesh = mesh_folder / f"mem_mesh_{cellid}.obj"
46-
if nuc_mesh.exists() and mem_mesh.exists():
46+
nuc_mesh_path = mesh_folder / f"nuc_mesh_{cellid}.obj"
47+
mem_mesh_path = mesh_folder / f"mem_mesh_{cellid}.obj"
48+
if nuc_mesh_path.exists() and mem_mesh_path.exists():
4749
cellid_list.append(cellid)
48-
nuc_meshes_to_use.append(nuc_mesh)
49-
mem_meshes_to_use.append(mem_mesh)
50-
print(f"Found {len(nuc_meshes_to_use)} meshes")
50+
nuc_meshes_to_use.append(nuc_mesh_path)
51+
mem_meshes_to_use.append(mem_mesh_path)
52+
log.info(f"Found {len(nuc_meshes_to_use)} meshes")
5153
# %% set up grid results directory
52-
grid_dir = base_datadir / f"structure_data/{STRUCTURE_ID}/grid_distances"
54+
grid_dir = base_datadir / f"structure_data/{STRUCTURE_ID}/grid_distances/"
5355
grid_dir.mkdir(exist_ok=True, parents=True)
54-
# %% run in parallel
55-
PARALLEL = True
56-
recalculate = True
56+
# %% run the workflow
57+
save_dir = None
58+
PARALLEL = False
59+
recalculate = False
5760
calc_nuc_distances = True
5861
calc_mem_distances = True
5962
calc_z_distances = True
60-
calc_inside_mem = True
61-
chunk_size = 50000
63+
calc_scaled_nuc_distances = True
64+
chunk_size = 20000
6265
if PARALLEL:
63-
num_cores = 4
66+
num_cores = 8
6467
results = []
6568
with ProcessPoolExecutor(max_workers=num_cores) as executor:
66-
futures = []
6769
for i in range(len(nuc_meshes_to_use)):
68-
futures.append(
70+
results.append(
6971
executor.submit(
7072
calculate_grid_distances,
7173
nuc_meshes_to_use[i],
@@ -77,13 +79,15 @@
7779
calc_nuc_distances,
7880
calc_mem_distances,
7981
calc_z_distances,
82+
calc_scaled_nuc_distances,
8083
chunk_size,
8184
)
8285
)
8386

84-
with tqdm(total=len(futures), desc="CellIDs done") as pbar:
85-
for future in as_completed(futures):
86-
pbar.update(1)
87+
with tqdm(total=len(results), desc="CellIDs done") as pbar:
88+
for result in as_completed(results):
89+
if result.result:
90+
pbar.update(1)
8791
else:
8892
results = []
8993
for i in tqdm(range(len(nuc_meshes_to_use)), desc="CellIDs done"):
@@ -98,6 +102,9 @@
98102
calc_nuc_distances=calc_nuc_distances,
99103
calc_mem_distances=calc_mem_distances,
100104
calc_z_distances=calc_z_distances,
105+
calc_scaled_nuc_distances=calc_scaled_nuc_distances,
101106
chunk_size=chunk_size,
102107
)
103108
)
109+
110+
# %%
Lines changed: 96 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
# %% [markdown]
2-
"""
3-
## Analyze and visualize available space distribution
4-
"""
2+
# # Analyze and visualize available space distribution
3+
import logging
54
import pickle
65
from pathlib import Path
76

87
import matplotlib.pyplot as plt
98
import numpy as np
109
import pandas as pd
1110
import seaborn as sns
12-
13-
# %%
1411
import trimesh
1512
from tqdm import tqdm
1613

@@ -19,16 +16,23 @@
1916
round_away_from_zero,
2017
)
2118

22-
# %% set pixel size
19+
log = logging.getLogger(__name__)
20+
21+
# %% [markdown]
22+
# ## Set parameters and file paths
23+
MEAN_SHAPE = False
24+
2325
PIX_SIZE = 0.108 # um per pixel
2426

25-
# %% set structure id
26-
STRUCTURE_ID = "SLC25A17" # SLC25A17: peroxisomes, RAB5A: early endosomes
27-
# %% set file paths and setup parameters
27+
STRUCTURE_ID = "SEC61B" # SLC25A17: peroxisomes, RAB5A: early endosomes
28+
STRUCTURE_NAME = "ER_peroxisome"
29+
2830
base_datadir = Path(__file__).parents[3] / "data"
2931
base_results_dir = Path(__file__).parents[3] / "results"
3032

31-
results_dir = base_results_dir / f"stochastic_variation_analysis/{STRUCTURE_ID}/rules/"
33+
results_dir = (
34+
base_results_dir / f"stochastic_variation_analysis/{STRUCTURE_NAME}/rules/"
35+
)
3236
results_dir.mkdir(exist_ok=True, parents=True)
3337

3438
figures_dir = results_dir / "figures"
@@ -37,24 +41,23 @@
3741
grid_dir = base_datadir / f"structure_data/{STRUCTURE_ID}/grid_distances"
3842
grid_dir.mkdir(exist_ok=True, parents=True)
3943

40-
print(f"Results directory: {results_dir}")
41-
print(f"Figures directory: {figures_dir}")
42-
print(f"Grid directory: {grid_dir}")
44+
log.info(f"Results directory: {results_dir}")
45+
log.info(f"Figures directory: {figures_dir}")
46+
log.info(f"Grid directory: {grid_dir}")
4347

44-
# %%
45-
use_mean_shape = False
46-
47-
# %% select cellids to use
48-
if use_mean_shape:
48+
# %% [markdown]
49+
# ## Select cellids to use
50+
if MEAN_SHAPE:
4951
mesh_folder = base_datadir / "average_shape_meshes"
5052
cellids_to_use = ["mean"]
5153
else:
5254
mesh_folder = base_datadir / f"structure_data/{STRUCTURE_ID}/meshes/"
5355
df_cellid = pd.read_csv("s3://cellpack-analysis-data/all_cellids.csv")
5456
df_struct = df_cellid.loc[df_cellid["structure_name"] == STRUCTURE_ID]
55-
cellids_to_use = df_struct.loc[df_struct["8dsphere"], "CellId"]
56-
print(f"Using {len(cellids_to_use)} cellids")
57-
# %% get meshes for cellids used
57+
cellids_to_use = df_struct.loc[df_struct["8dsphere"], "CellId"].tolist()
58+
log.info(f"Using {len(cellids_to_use)} cellids")
59+
# %% [markdown]
60+
# ## Get meshes for cellids used
5861
cellid_list = []
5962
nuc_meshes_to_use = []
6063
mem_meshes_to_use = []
@@ -65,10 +68,12 @@
6568
cellid_list.append(cellid)
6669
nuc_meshes_to_use.append(nuc_mesh)
6770
mem_meshes_to_use.append(mem_mesh)
68-
print(f"Found {len(nuc_meshes_to_use)} meshes")
71+
log.info(f"Found {len(nuc_meshes_to_use)} meshes")
6972

70-
# %% load meshes
71-
if use_mean_shape:
73+
# %% [markdown]
74+
# # Grid spacing illustration
75+
# ## Load meshes
76+
if MEAN_SHAPE:
7277
nuc_mesh = trimesh.load_mesh(
7378
base_datadir / "average_shape_meshes/nuc_mesh_mean.obj"
7479
)
@@ -79,86 +84,119 @@
7984
nuc_mesh = trimesh.load_mesh(nuc_meshes_to_use[0])
8085
mem_mesh = trimesh.load_mesh(mem_meshes_to_use[0])
8186

82-
# %% set up grid
83-
SPACING = 2
87+
# %% [markdown]
88+
# ## Get grid points
89+
SPACING = 5
8490
bounds = mem_mesh.bounds
8591
bounding_box = round_away_from_zero(bounds)
8692
all_points = get_list_of_grid_points(bounding_box, SPACING)
87-
# %% explicit inside-outside check
88-
print("Calculating nuc inside check")
93+
94+
# %% [markdown]
95+
# ## Run inside-outside check
96+
log.info("Calculating nuc inside check")
8997
inside_nuc = nuc_mesh.contains(all_points)
90-
print("Calculating mem inside check")
98+
log.info("Calculating mem inside check")
9199
inside_mem = mem_mesh.contains(all_points)
92-
93-
# %% find points inside mem but outside nuc
100+
# %% [markdown]
101+
# ## Plot grid points
94102
inside_mem_outside_nuc = inside_mem & ~inside_nuc
95-
# %% plot grid point scatter plot
103+
96104
fig, ax = plt.subplots(dpi=300)
97105
all_points_scaled = all_points * PIX_SIZE
106+
centroid = np.mean(all_points_scaled, axis=0)
98107
ax.scatter(
99-
all_points_scaled[inside_mem_outside_nuc, 0],
100-
all_points_scaled[inside_mem_outside_nuc, 1],
108+
all_points_scaled[inside_mem_outside_nuc, 0] - centroid[0],
109+
all_points_scaled[inside_mem_outside_nuc, 1] - centroid[1],
101110
c="magenta",
102-
label="inside mem outside nuc",
103-
s=0.1,
104-
alpha=0.7,
111+
label="Available points",
112+
s=0.5,
113+
alpha=1,
105114
)
106115
ax.scatter(
107-
all_points_scaled[inside_nuc, 0],
108-
all_points_scaled[inside_nuc, 1],
116+
all_points_scaled[inside_nuc, 0] - centroid[0],
117+
all_points_scaled[inside_nuc, 1] - centroid[1],
109118
c="cyan",
110119
label="inside nuc",
111-
s=0.1,
112-
alpha=0.7,
120+
s=0.5,
121+
alpha=1,
113122
)
114-
ax.set_xlabel("x (\u03BCm)")
115-
ax.set_ylabel("y (\u03BCm)")
116-
ax.legend(loc="lower center", bbox_to_anchor=(0.5, 1))
123+
ax.set_xlabel("x (\u03bcm)")
124+
ax.set_ylabel("y (\u03bcm)")
125+
# ax.legend(loc="lower center", bbox_to_anchor=(0.5, 1))
117126
ax.set_aspect("equal")
127+
# ax.set_aspect(1.3)
118128
plt.show()
119-
fig.savefig(figures_dir / "grid_points.png", bbox_inches="tight")
129+
file_name = "grid_points"
130+
if MEAN_SHAPE:
131+
file_name += "_mean"
132+
fig.savefig(figures_dir / f"{file_name}.png", bbox_inches="tight")
120133

121-
# %% load mesh information
134+
# %% [markdown]
135+
# # Plot distance from nucleus for all shapes
136+
# ## Load mesh information
122137
file_path = grid_dir.parent / "mesh_information.dat"
123138
with open(file_path, "rb") as f:
124139
mesh_information_dict = pickle.load(f)
125-
# %% load saved distances
140+
# %% [markdown]
141+
# ## Load distances
126142
# normalization = "cell_diameter"
127143
normalization = None
128144
nuc_distances = []
129145
mem_distances = []
130146
for cellid in tqdm(cellids_to_use):
131-
normalization_factor = mesh_information_dict[str(cellid)].get(normalization, 1)
147+
if normalization is not None:
148+
normalization_factor = mesh_information_dict[str(cellid)].get(normalization, 1)
149+
else:
150+
normalization_factor = 1
132151
nuc_distances.append(
133152
np.load(grid_dir / f"nuc_distances_{cellid}.npy") / normalization_factor
134153
)
135154
mem_distances.append(
136155
np.load(grid_dir / f"mem_distances_{cellid}.npy") / normalization_factor
137156
)
138157

139-
# %% plot distance distribution kdeplot
158+
# %% [markdown]
159+
# ## Plot distance distribution as kde
140160
fig, ax = plt.subplots(dpi=300)
141-
cmap = plt.get_cmap("jet", len(nuc_distances))
161+
cmap = plt.get_cmap("viridis", len(nuc_distances))
162+
color_inds = np.random.permutation(len(nuc_distances))
142163
all_nuc_distances = []
143164
for i in tqdm(range(len(nuc_distances))):
144165
distances_to_plot = nuc_distances[i] * PIX_SIZE
145166
distances_to_plot = distances_to_plot[distances_to_plot > 0]
146-
sns.kdeplot(distances_to_plot, ax=ax, color=cmap(i + 1), alpha=0.3)
167+
sns.kdeplot(
168+
distances_to_plot, ax=ax, color=cmap(color_inds[i]), alpha=0.4, linewidth=1
169+
)
147170
all_nuc_distances.append(distances_to_plot)
171+
# break
148172
all_nuc_distances = np.concatenate(all_nuc_distances)
149-
sns.kdeplot(all_nuc_distances, ax=ax, color=cmap(0), linewidth=2)
150-
mean_distance = np.mean(all_nuc_distances)
173+
sns.kdeplot(all_nuc_distances, ax=ax, color="k", linewidth=3)
174+
mean_distance = np.mean(all_nuc_distances).item()
151175
ax.axvline(mean_distance, color="black", linestyle="--")
152-
ax.set_title(f"Distance to nucleus\nMean: {mean_distance:.2f}\u03BCm")
153-
ax.set_xlabel("Distance (\u03BCm)")
176+
# %% [markdown]
177+
# Adjust plot
178+
# xlim = ax.get_xlim()
179+
plt.rcdefaults()
180+
ax.set_xlim((-1, 10))
181+
ax.set_title(f"Distance to nucleus\nMean: {mean_distance:.2f}\u03bcm")
182+
ax.set_xlabel("Distance (\u03bcm)")
154183
ax.set_ylabel("Probability density")
155-
plt.show()
156-
# %% plot distance distribution histogram
184+
file_name = "nuc_distance_kde"
185+
fig.savefig(figures_dir / f"{file_name}.png", bbox_inches="tight")
186+
fig.savefig(figures_dir / f"{file_name}.svg")
187+
# plt.show()
188+
fig
189+
# %% [markdown]
190+
# ## Plot distance distribution histogram
157191
fig, ax = plt.subplots(dpi=300)
158192
nuc_distances = np.array(nuc_distances)
159193
distances_to_plot = nuc_distances[0][nuc_distances[0] > 0] * PIX_SIZE
160194
ax.hist(distances_to_plot, bins=100)
161-
ax.set_xlabel("Distance (\u03BCm)")
195+
ax.set_xlabel("Distance (\u03bcm)")
162196
ax.set_ylabel("Number of points")
163197
ax.set_title("Distance to nucleus")
198+
file_name = "nuc_distance_hist"
199+
fig.savefig(figures_dir / f"{file_name}.png", bbox_inches="tight")
164200
plt.show()
201+
202+
# %%

0 commit comments

Comments
 (0)