|
1 | 1 | # %% [markdown] |
2 | | -""" |
3 | | -## Analyze and visualize available space distribution |
4 | | -""" |
| 2 | +# # Analyze and visualize available space distribution |
| 3 | +import logging |
5 | 4 | import pickle |
6 | 5 | from pathlib import Path |
7 | 6 |
|
8 | 7 | import matplotlib.pyplot as plt |
9 | 8 | import numpy as np |
10 | 9 | import pandas as pd |
11 | 10 | import seaborn as sns |
12 | | - |
13 | | -# %% |
14 | 11 | import trimesh |
15 | 12 | from tqdm import tqdm |
16 | 13 |
|
|
19 | 16 | round_away_from_zero, |
20 | 17 | ) |
21 | 18 |
|
22 | | -# %% set pixel size |
| 19 | +log = logging.getLogger(__name__) |
| 20 | + |
| 21 | +# %% [markdown] |
| 22 | +# ## Set parameters and file paths |
| 23 | +MEAN_SHAPE = False |
| 24 | + |
23 | 25 | PIX_SIZE = 0.108 # um per pixel |
24 | 26 |
|
25 | | -# %% set structure id |
26 | | -STRUCTURE_ID = "SLC25A17" # SLC25A17: peroxisomes, RAB5A: early endosomes |
27 | | -# %% set file paths and setup parameters |
| 27 | +STRUCTURE_ID = "SEC61B" # SLC25A17: peroxisomes, RAB5A: early endosomes |
| 28 | +STRUCTURE_NAME = "ER_peroxisome" |
| 29 | + |
28 | 30 | base_datadir = Path(__file__).parents[3] / "data" |
29 | 31 | base_results_dir = Path(__file__).parents[3] / "results" |
30 | 32 |
|
31 | | -results_dir = base_results_dir / f"stochastic_variation_analysis/{STRUCTURE_ID}/rules/" |
| 33 | +results_dir = ( |
| 34 | + base_results_dir / f"stochastic_variation_analysis/{STRUCTURE_NAME}/rules/" |
| 35 | +) |
32 | 36 | results_dir.mkdir(exist_ok=True, parents=True) |
33 | 37 |
|
34 | 38 | figures_dir = results_dir / "figures" |
|
37 | 41 | grid_dir = base_datadir / f"structure_data/{STRUCTURE_ID}/grid_distances" |
38 | 42 | grid_dir.mkdir(exist_ok=True, parents=True) |
39 | 43 |
|
40 | | -print(f"Results directory: {results_dir}") |
41 | | -print(f"Figures directory: {figures_dir}") |
42 | | -print(f"Grid directory: {grid_dir}") |
| 44 | +log.info(f"Results directory: {results_dir}") |
| 45 | +log.info(f"Figures directory: {figures_dir}") |
| 46 | +log.info(f"Grid directory: {grid_dir}") |
43 | 47 |
|
44 | | -# %% |
45 | | -use_mean_shape = False |
46 | | - |
47 | | -# %% select cellids to use |
48 | | -if use_mean_shape: |
| 48 | +# %% [markdown] |
| 49 | +# ## Select cellids to use |
| 50 | +if MEAN_SHAPE: |
49 | 51 | mesh_folder = base_datadir / "average_shape_meshes" |
50 | 52 | cellids_to_use = ["mean"] |
51 | 53 | else: |
52 | 54 | mesh_folder = base_datadir / f"structure_data/{STRUCTURE_ID}/meshes/" |
53 | 55 | df_cellid = pd.read_csv("s3://cellpack-analysis-data/all_cellids.csv") |
54 | 56 | df_struct = df_cellid.loc[df_cellid["structure_name"] == STRUCTURE_ID] |
55 | | - cellids_to_use = df_struct.loc[df_struct["8dsphere"], "CellId"] |
56 | | -print(f"Using {len(cellids_to_use)} cellids") |
57 | | -# %% get meshes for cellids used |
| 57 | + cellids_to_use = df_struct.loc[df_struct["8dsphere"], "CellId"].tolist() |
| 58 | +log.info(f"Using {len(cellids_to_use)} cellids") |
| 59 | +# %% [markdown] |
| 60 | +# ## Get meshes for cellids used |
58 | 61 | cellid_list = [] |
59 | 62 | nuc_meshes_to_use = [] |
60 | 63 | mem_meshes_to_use = [] |
|
65 | 68 | cellid_list.append(cellid) |
66 | 69 | nuc_meshes_to_use.append(nuc_mesh) |
67 | 70 | mem_meshes_to_use.append(mem_mesh) |
68 | | -print(f"Found {len(nuc_meshes_to_use)} meshes") |
| 71 | +log.info(f"Found {len(nuc_meshes_to_use)} meshes") |
69 | 72 |
|
70 | | -# %% load meshes |
71 | | -if use_mean_shape: |
| 73 | +# %% [markdown] |
| 74 | +# # Grid spacing illustration |
| 75 | +# ## Load meshes |
| 76 | +if MEAN_SHAPE: |
72 | 77 | nuc_mesh = trimesh.load_mesh( |
73 | 78 | base_datadir / "average_shape_meshes/nuc_mesh_mean.obj" |
74 | 79 | ) |
|
79 | 84 | nuc_mesh = trimesh.load_mesh(nuc_meshes_to_use[0]) |
80 | 85 | mem_mesh = trimesh.load_mesh(mem_meshes_to_use[0]) |
81 | 86 |
|
82 | | -# %% set up grid |
83 | | -SPACING = 2 |
| 87 | +# %% [markdown] |
| 88 | +# ## Get grid points |
| 89 | +SPACING = 5 |
84 | 90 | bounds = mem_mesh.bounds |
85 | 91 | bounding_box = round_away_from_zero(bounds) |
86 | 92 | all_points = get_list_of_grid_points(bounding_box, SPACING) |
87 | | -# %% explicit inside-outside check |
88 | | -print("Calculating nuc inside check") |
| 93 | + |
| 94 | +# %% [markdown] |
| 95 | +# ## Run inside-outside check |
| 96 | +log.info("Calculating nuc inside check") |
89 | 97 | inside_nuc = nuc_mesh.contains(all_points) |
90 | | -print("Calculating mem inside check") |
| 98 | +log.info("Calculating mem inside check") |
91 | 99 | inside_mem = mem_mesh.contains(all_points) |
92 | | - |
93 | | -# %% find points inside mem but outside nuc |
| 100 | +# %% [markdown] |
| 101 | +# ## Plot grid points |
94 | 102 | inside_mem_outside_nuc = inside_mem & ~inside_nuc |
95 | | -# %% plot grid point scatter plot |
| 103 | + |
96 | 104 | fig, ax = plt.subplots(dpi=300) |
97 | 105 | all_points_scaled = all_points * PIX_SIZE |
| 106 | +centroid = np.mean(all_points_scaled, axis=0) |
98 | 107 | ax.scatter( |
99 | | - all_points_scaled[inside_mem_outside_nuc, 0], |
100 | | - all_points_scaled[inside_mem_outside_nuc, 1], |
| 108 | + all_points_scaled[inside_mem_outside_nuc, 0] - centroid[0], |
| 109 | + all_points_scaled[inside_mem_outside_nuc, 1] - centroid[1], |
101 | 110 | c="magenta", |
102 | | - label="inside mem outside nuc", |
103 | | - s=0.1, |
104 | | - alpha=0.7, |
| 111 | + label="Available points", |
| 112 | + s=0.5, |
| 113 | + alpha=1, |
105 | 114 | ) |
106 | 115 | ax.scatter( |
107 | | - all_points_scaled[inside_nuc, 0], |
108 | | - all_points_scaled[inside_nuc, 1], |
| 116 | + all_points_scaled[inside_nuc, 0] - centroid[0], |
| 117 | + all_points_scaled[inside_nuc, 1] - centroid[1], |
109 | 118 | c="cyan", |
110 | 119 | label="inside nuc", |
111 | | - s=0.1, |
112 | | - alpha=0.7, |
| 120 | + s=0.5, |
| 121 | + alpha=1, |
113 | 122 | ) |
114 | | -ax.set_xlabel("x (\u03BCm)") |
115 | | -ax.set_ylabel("y (\u03BCm)") |
116 | | -ax.legend(loc="lower center", bbox_to_anchor=(0.5, 1)) |
| 123 | +ax.set_xlabel("x (\u03bcm)") |
| 124 | +ax.set_ylabel("y (\u03bcm)") |
| 125 | +# ax.legend(loc="lower center", bbox_to_anchor=(0.5, 1)) |
117 | 126 | ax.set_aspect("equal") |
| 127 | +# ax.set_aspect(1.3) |
118 | 128 | plt.show() |
119 | | -fig.savefig(figures_dir / "grid_points.png", bbox_inches="tight") |
| 129 | +file_name = "grid_points" |
| 130 | +if MEAN_SHAPE: |
| 131 | + file_name += "_mean" |
| 132 | +fig.savefig(figures_dir / f"{file_name}.png", bbox_inches="tight") |
120 | 133 |
|
121 | | -# %% load mesh information |
| 134 | +# %% [markdown] |
| 135 | +# # Plot distance from nucleus for all shapes |
| 136 | +# ## Load mesh information |
122 | 137 | file_path = grid_dir.parent / "mesh_information.dat" |
123 | 138 | with open(file_path, "rb") as f: |
124 | 139 | mesh_information_dict = pickle.load(f) |
125 | | -# %% load saved distances |
| 140 | +# %% [markdown] |
| 141 | +# ## Load distances |
126 | 142 | # normalization = "cell_diameter" |
127 | 143 | normalization = None |
128 | 144 | nuc_distances = [] |
129 | 145 | mem_distances = [] |
130 | 146 | for cellid in tqdm(cellids_to_use): |
131 | | - normalization_factor = mesh_information_dict[str(cellid)].get(normalization, 1) |
| 147 | + if normalization is not None: |
| 148 | + normalization_factor = mesh_information_dict[str(cellid)].get(normalization, 1) |
| 149 | + else: |
| 150 | + normalization_factor = 1 |
132 | 151 | nuc_distances.append( |
133 | 152 | np.load(grid_dir / f"nuc_distances_{cellid}.npy") / normalization_factor |
134 | 153 | ) |
135 | 154 | mem_distances.append( |
136 | 155 | np.load(grid_dir / f"mem_distances_{cellid}.npy") / normalization_factor |
137 | 156 | ) |
138 | 157 |
|
139 | | -# %% plot distance distribution kdeplot |
| 158 | +# %% [markdown] |
| 159 | +# ## Plot distance distribution as kde |
140 | 160 | fig, ax = plt.subplots(dpi=300) |
141 | | -cmap = plt.get_cmap("jet", len(nuc_distances)) |
| 161 | +cmap = plt.get_cmap("viridis", len(nuc_distances)) |
| 162 | +color_inds = np.random.permutation(len(nuc_distances)) |
142 | 163 | all_nuc_distances = [] |
143 | 164 | for i in tqdm(range(len(nuc_distances))): |
144 | 165 | distances_to_plot = nuc_distances[i] * PIX_SIZE |
145 | 166 | distances_to_plot = distances_to_plot[distances_to_plot > 0] |
146 | | - sns.kdeplot(distances_to_plot, ax=ax, color=cmap(i + 1), alpha=0.3) |
| 167 | + sns.kdeplot( |
| 168 | + distances_to_plot, ax=ax, color=cmap(color_inds[i]), alpha=0.4, linewidth=1 |
| 169 | + ) |
147 | 170 | all_nuc_distances.append(distances_to_plot) |
| 171 | + # break |
148 | 172 | all_nuc_distances = np.concatenate(all_nuc_distances) |
149 | | -sns.kdeplot(all_nuc_distances, ax=ax, color=cmap(0), linewidth=2) |
150 | | -mean_distance = np.mean(all_nuc_distances) |
| 173 | +sns.kdeplot(all_nuc_distances, ax=ax, color="k", linewidth=3) |
| 174 | +mean_distance = np.mean(all_nuc_distances).item() |
151 | 175 | ax.axvline(mean_distance, color="black", linestyle="--") |
152 | | -ax.set_title(f"Distance to nucleus\nMean: {mean_distance:.2f}\u03BCm") |
153 | | -ax.set_xlabel("Distance (\u03BCm)") |
| 176 | +# %% [markdown] |
| 177 | +# Adjust plot |
| 178 | +# xlim = ax.get_xlim() |
| 179 | +plt.rcdefaults() |
| 180 | +ax.set_xlim((-1, 10)) |
| 181 | +ax.set_title(f"Distance to nucleus\nMean: {mean_distance:.2f}\u03bcm") |
| 182 | +ax.set_xlabel("Distance (\u03bcm)") |
154 | 183 | ax.set_ylabel("Probability density") |
155 | | -plt.show() |
156 | | -# %% plot distance distribution histogram |
| 184 | +file_name = "nuc_distance_kde" |
| 185 | +fig.savefig(figures_dir / f"{file_name}.png", bbox_inches="tight") |
| 186 | +fig.savefig(figures_dir / f"{file_name}.svg") |
| 187 | +# plt.show() |
| 188 | +fig |
| 189 | +# %% [markdown] |
| 190 | +# ## Plot distance distribution histogram |
157 | 191 | fig, ax = plt.subplots(dpi=300) |
158 | 192 | nuc_distances = np.array(nuc_distances) |
159 | 193 | distances_to_plot = nuc_distances[0][nuc_distances[0] > 0] * PIX_SIZE |
160 | 194 | ax.hist(distances_to_plot, bins=100) |
161 | | -ax.set_xlabel("Distance (\u03BCm)") |
| 195 | +ax.set_xlabel("Distance (\u03bcm)") |
162 | 196 | ax.set_ylabel("Number of points") |
163 | 197 | ax.set_title("Distance to nucleus") |
| 198 | +file_name = "nuc_distance_hist" |
| 199 | +fig.savefig(figures_dir / f"{file_name}.png", bbox_inches="tight") |
164 | 200 | plt.show() |
| 201 | + |
| 202 | +# %% |
0 commit comments