Skip to content

Commit cfeb402

Browse files
committed
update README with runnable test data
1 parent 2a3f09a commit cfeb402

File tree

5 files changed

+44
-9
lines changed

5 files changed

+44
-9
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,17 @@ Before running the analysis, you need to:
4040

4141
- Follow the workflow steps, which are described in detail in the manuscript and summarized below in the [Shapespace construction](## Shapespace construction) section.
4242

43-
We also provide a test dataset (single-cell crops and masks) which allows you to quickly test shape parameterization, constructing shapespace and map protein intensity to the average cell shape.
43+
We also provide a test dataset (single-cell crops and masks) which allows you to quickly test shape parameterization, constructing shapespace and map protein intensity to the average cell shape. NOTE: This dataset is intentionally small for testing and may not preserve the true average shape representation of the cell line. The number of samples/organelles is also limited, so it will not recover the true organelle map.
4444

4545
```bash
4646
wget https://ell-vault.stanford.edu/dav/trangle/www/K-562.zip
4747
unzip K-562.zip -d K-562
4848

4949
python -m coefficients.s2_calculate_fft
50+
python -m analysis.cell_nucleus_ratio
5051
python -m shapemodes.s3_calculate_shapemodes
51-
python -m warp.s4_concentric_rings_intensity # check cfg.N_ISOS and cfg.LANDMARKS
52-
python -m warp.s4_protein_image_warp # check cfg.LANDMARKS
52+
python -m warps.s4_concentric_rings_intensity --cell_line K-562 --n_isos 10 20 # check cfg.N_ISOS and cfg.LANDMARKS
53+
python -m warps.s4_tsp --cell_line K-562 # check cfg.LANDMARKS
5354
```
5455
For large datasets or when analyzing multiple cell lines, consider using a workflow manager such as **Snakemake**, or submitting separate jobs to a compute cluster using **SLURM**. Example workflow files and job scripts can be found inside each folder.
5556

configs/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,5 @@
109109
PROJECT_DIR = f"/scratch/groups/emmalu/2Dshapespace/{CELL_LINE.replace(' ','_')}"
110110
META_PATH = "/scratch/groups/emmalu/sl_pHPA_15_0.05_euclidean_100000_rmoutliers_ilsc_3d_bbox_rm_border.csv"
111111
else:
112-
PROJECT_DIR = f"./{CELL_LINE.replace(' ','_')}"
113-
META_PATH = "" # your own metadata to perform downstream analysis
112+
PROJECT_DIR = f"./{CELL_LINE}"
113+
META_PATH = f"./{CELL_LINE}/meta_k562.csv" # your own metadata to perform downstream analysis

setup.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from setuptools import setup, find_packages
2+
3+
setup(
4+
name="2D_shapespace", # Package name
5+
version="0.1.0",
6+
description="Analysis of cell line shapespace and organelle correlations",
7+
author="trangle1302",
8+
packages=find_packages(),
9+
python_requires=">=3.8",
10+
install_requires=[
11+
"scikit-learn==1.4.2",
12+
"scikit-image==0.22.0",
13+
"seaborn==0.13.0",
14+
"opencv-python-headless==4.6.0.66",
15+
"more_itertools==10.7.0",
16+
"tqdm==4.67.1",
17+
"numpy==1.26.4",
18+
# Optional / experimental (not used in final version):
19+
# "pyefd",
20+
# "PyWavelets",
21+
],
22+
)

warps/s4_concentric_rings_intensity.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,14 @@ def main():
111111
mappings = pd.read_csv('/data/HPA-IF-images/IF-image.csv')
112112
mappings = mappings[mappings.atlas_name=='U2OS']
113113
else:
114-
cellline_meta = os.path.join(project_dir, os.path.basename(cfg.META_PATH).replace(".csv", "_splitVesiclesPCP.csv"))
115-
mappings = pd.read_csv(cellline_meta)
114+
cellline_meta_path = os.path.join(project_dir, os.path.basename(cfg.META_PATH).replace(".csv", "_splitVesiclesPCP.csv"))
115+
if os.path.exists(cellline_meta_path):
116+
mappings = pd.read_csv(cellline_meta_path)
117+
else:
118+
mappings = pd.read_csv(cfg.META_PATH)
119+
mappings["cell_idx"] = [idx.split("_", 1)[1] for idx in mappings.id]
120+
from warps.avg_organelle import unmerge_label
121+
mappings = unmerge_label(mappings)
116122
#mappings = mappings[~mappings.sc_target.isin(["Negative","Multi-Location"])]
117123
print(mappings.columns)
118124
#print(mappings.sc_target.value_counts(), mappings.cell_idx)

warps/s4_tsp.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,14 @@ def main():
200200
if not os.path.exists(protein_dir):
201201
os.makedirs(protein_dir)
202202

203-
cellline_meta = os.path.join(project_dir, os.path.basename(cfg.META_PATH).replace(".csv", "_splitVesiclesPCP.csv"))
204-
mappings = pd.read_csv(cellline_meta)
203+
cellline_meta_path = os.path.join(project_dir, os.path.basename(cfg.META_PATH).replace(".csv", "_splitVesiclesPCP.csv"))
204+
if os.path.exists(cellline_meta_path):
205+
mappings = pd.read_csv(cellline_meta_path)
206+
else:
207+
mappings = pd.read_csv(cfg.META_PATH)
208+
mappings["cell_idx"] = [idx.split("_", 1)[1] for idx in mappings.id]
209+
from warps.avg_organelle import unmerge_label
210+
mappings = unmerge_label(mappings)
205211
log_dir = f"{project_dir}/logs"
206212
fft_dir = f"{project_dir}/fftcoefs/{cfg.ALIGNMENT}"
207213
fft_path = os.path.join(fft_dir, f"fftcoefs_{cfg.N_COEFS}.txt")

0 commit comments

Comments
 (0)