Skip to content

Commit 3a4f83c

Browse files
authored
Merge pull request #36 from openproblems-bio/generalise_tenx_xenium_loader
Generalise tenx xenium loader
2 parents 776ba36 + 4bc094d commit 3a4f83c

File tree

4 files changed

+183
-10
lines changed

4 files changed

+183
-10
lines changed
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/bin/bash
2+
3+
# get the root of the directory
4+
REPO_ROOT=$(git rev-parse --show-toplevel)
5+
6+
# ensure that the command below is run from the root of the repository
7+
cd "$REPO_ROOT"
8+
9+
set -e
10+
11+
publish_dir="s3://openproblems-data/resources/datasets"
12+
13+
cat > /tmp/params.yaml << HERE
14+
param_list:
15+
16+
- id: "10x_xenium/2023_10x_human_lung_xenium"
17+
input: https://cf.10xgenomics.com/samples/xenium/1.3.0/Xenium_Preview_Human_Non_diseased_Lung_With_Add_on_FFPE/Xenium_Preview_Human_Non_diseased_Lung_With_Add_on_FFPE_outs.zip
18+
dataset_name: "Xenium Preview Human Non diseased Lung With Add on FFPE"
19+
dataset_url: "https://www.10xgenomics.com/datasets/xenium-human-lung-preview-data-1-standard"
20+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human lung sections, using a development version of the Human Lung Gene Expression Panel and custom add-on panel for lung cell types."
21+
dataset_description: "Preview of Xenium In Situ Gene Expression data for adult human lung sections, using a development version of the Human Lung Gene Expression Panel and custom add-on panel for lung cell types. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.3.0"
22+
dataset_organism: "homo_sapiens"
23+
segmentation_id: [cell, nucleus]
24+
25+
- id: "10x_xenium/2023_10x_human_lung_cancer_xenium"
26+
input: https://s3-us-west-2.amazonaws.com/10x.files/samples/xenium/1.3.0/Xenium_Preview_Human_Lung_Cancer_With_Add_on_2_FFPE/Xenium_Preview_Human_Lung_Cancer_With_Add_on_2_FFPE_outs.zip
27+
dataset_name: "Xenium Preview Human Lung Cancer With Add on 2 FFPE"
28+
dataset_url: "https://www.10xgenomics.com/datasets/xenium-human-lung-preview-data-1-standard"
29+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human lung sections, using a development version of the Human Lung Gene Expression Panel and custom add-on panel for lung cell types."
30+
dataset_description: "Preview of Xenium In Situ Gene Expression data for adult human lung sections, using a development version of the Human Lung Gene Expression Panel and custom add-on panel for lung cell types. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.3.0"
31+
dataset_organism: "homo_sapiens"
32+
segmentation_id: [cell, nucleus]
33+
34+
- id: "10x_xenium/2023_10x_human_pancreas_cancer_xenium"
35+
input: https://cf.10xgenomics.com/samples/xenium/1.6.0/Xenium_V1_hPancreas_Cancer_Add_on_FFPE/Xenium_V1_hPancreas_Cancer_Add_on_FFPE_outs.zip
36+
dataset_name: "Xenium V1 hPancreas Cancer Add on FFPE"
37+
dataset_url: "https://www.10xgenomics.com/datasets/pancreatic-cancer-with-xenium-human-multi-tissue-and-cancer-panel-1-standard"
38+
dataset_summary: "Xenium In Situ Gene Expression data for human pancreatic cancer sections using the Xenium Human Multi-Tissue and Cancer Panel."
39+
dataset_description: "Xenium In Situ Gene Expression data for human pancreatic cancer sections using the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.6.0"
40+
dataset_organism: "homo_sapiens"
41+
segmentation_id: [cell, nucleus]
42+
43+
- id: "10x_xenium/2023_10x_human_brain_xenium"
44+
input: https://cf.10xgenomics.com/samples/xenium/1.3.0/Xenium_V1_FFPE_Human_Brain_Healthy_With_Addon/Xenium_V1_FFPE_Human_Brain_Healthy_With_Addon_outs.zip
45+
dataset_name: "Xenium V1 FFPE Human Brain Healthy With Addon"
46+
dataset_url: "https://www.10xgenomics.com/datasets/xenium-human-brain-preview-data-1-standard"
47+
dataset_summary: "Adult human brain cortical section, healthy."
48+
dataset_description: "Preview of Xenium In Situ Gene Expression data for adult human brain cortical sections, using a development version of the Human Brain Gene Expression Panel and two custom add-on panels for brain cell types. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.3.0"
49+
dataset_organism: "homo_sapiens"
50+
segmentation_id: [cell, nucleus]
51+
52+
- id: "10x_xenium/2024_10x_human_skin_xenium"
53+
input: https://cf.10xgenomics.com/samples/xenium/1.9.0/Xenium_V1_hSkin_nondiseased_section_1_FFPE/Xenium_V1_hSkin_nondiseased_section_1_FFPE_outs.zip
54+
dataset_name: "Xenium V1 hSkin nondiseased section 1 FFPE"
55+
dataset_url: "https://www.10xgenomics.com/datasets/human-skin-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
56+
dataset_summary: "Xenium In Situ Gene Expression data for adult human skin sections using the Xenium Human Multi-Tissue and Cancer Panel."
57+
dataset_description: "Xenium In Situ Gene Expression data for adult human skin sections using the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.9.0"
58+
dataset_organism: "homo_sapiens"
59+
segmentation_id: [cell, nucleus]
60+
61+
- id: "10x_xenium/2024_10x_human_liver_xenium"
62+
input: https://cf.10xgenomics.com/samples/xenium/1.9.0/Xenium_V1_hLiver_nondiseased_section_FFPE/Xenium_V1_hLiver_nondiseased_section_FFPE_outs.zip
63+
dataset_name: "Xenium V1 hLiver nondiseased section FFPE"
64+
dataset_url: "https://www.10xgenomics.com/datasets/human-liver-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
65+
dataset_summary: "Xenium In Situ Gene Expression data for adult human liver sections using the Xenium Human Multi-Tissue and Cancer Panel."
66+
dataset_description: "Xenium In Situ Gene Expression data for adult human liver sections using the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.9.0"
67+
dataset_organism: "homo_sapiens"
68+
segmentation_id: [cell, nucleus]
69+
70+
- id: "10x_xenium/2024_10x_human_liver_cancer_xenium"
71+
input: https://cf.10xgenomics.com/samples/xenium/1.9.0/Xenium_V1_hLiver_cancer_section_FFPE/Xenium_V1_hLiver_cancer_section_FFPE_outs.zip
72+
dataset_name: "Xenium V1 hLiver cancer section FFPE"
73+
dataset_url: "https://www.10xgenomics.com/datasets/human-liver-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
74+
dataset_summary: "Xenium In Situ Gene Expression data for adult human liver sections using the Xenium Human Multi-Tissue and Cancer Panel."
75+
dataset_description: "Xenium In Situ Gene Expression data for adult human liver sections using the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.9.0"
76+
dataset_organism: "homo_sapiens"
77+
segmentation_id: [cell, nucleus]
78+
79+
- id: "10x_xenium/2024_10x_human_heart_xenium"
80+
input: https://cf.10xgenomics.com/samples/xenium/1.9.0/Xenium_V1_hHeart_nondiseased_section_FFPE/Xenium_V1_hHeart_nondiseased_section_FFPE_outs.zip
81+
dataset_name: "Xenium V1 hHeart nondiseased section FFPE"
82+
dataset_url: "https://www.10xgenomics.com/datasets/human-heart-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
83+
dataset_summary: "Xenium In Situ Gene Expression data for adult human heart section using the Xenium Human Multi-Tissue and Cancer Panel."
84+
dataset_description: "Xenium In Situ Gene Expression data for adult human heart section using the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.9.0"
85+
dataset_organism: "homo_sapiens"
86+
segmentation_id: [cell, nucleus]
87+
88+
- id: "10x_xenium/2023_10x_human_colon_xenium"
89+
input: https://cf.10xgenomics.com/samples/xenium/1.6.0/Xenium_V1_hColon_Non_diseased_Add_on_FFPE/Xenium_V1_hColon_Non_diseased_Add_on_FFPE_outs.zip
90+
dataset_name: "Xenium V1 hColon Non diseased Add on FFPE"
91+
dataset_url: "https://www.10xgenomics.com/datasets/human-colon-preview-data-xenium-human-colon-gene-expression-panel-1-standard"
92+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human colon sections, using a development version of the Xenium Human Colon Gene Expression Panel."
93+
dataset_description: "Preview of Xenium In Situ Gene Expression data for adult human colon sections, using a development version of the Xenium Human Colon Gene Expression Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.6.0"
94+
dataset_organism: "homo_sapiens"
95+
segmentation_id: [cell, nucleus]
96+
97+
- id: "10x_xenium/2023_10x_human_colon_cancer_xenium"
98+
input: https://s3-us-west-2.amazonaws.com/10x.files/samples/xenium/1.6.0/Xenium_V1_hColon_Cancer_Add_on_FFPE/Xenium_V1_hColon_Cancer_Add_on_FFPE_outs.zip
99+
dataset_name: "Xenium V1 hColon Cancer Add on FFPE"
100+
dataset_url: "https://www.10xgenomics.com/datasets/human-colon-preview-data-xenium-human-colon-gene-expression-panel-1-standard"
101+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human colon sections, using a development version of the Xenium Human Colon Gene Expression Panel."
102+
dataset_description: "Cancer; stage 2A adenocarcinoma. Preview of Xenium In Situ Gene Expression data for adult human colon sections, using a development version of the Xenium Human Colon Gene Expression Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.6.0"
103+
dataset_organism: "homo_sapiens"
104+
segmentation_id: [cell, nucleus]
105+
106+
- id: "10x_xenium/2023_10x_human_kidney_xenium"
107+
input: https://cf.10xgenomics.com/samples/xenium/1.5.0/Xenium_V1_hKidney_nondiseased_section/Xenium_V1_hKidney_nondiseased_section_outs.zip
108+
dataset_name: "Xenium V1 hKidney nondiseased section"
109+
dataset_url: "https://www.10xgenomics.com/datasets/human-kidney-preview-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
110+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human kidney sections, using a development version of the Xenium Human Multi-Tissue and Cancer Panel."
111+
dataset_description: "Preview of Xenium In Situ Gene Expression data for adult human kidney sections, using a development version of the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.5.0"
112+
dataset_organism: "homo_sapiens"
113+
segmentation_id: [cell, nucleus]
114+
115+
- id: "10x_xenium/2023_10x_human_kidney_cancer_xenium"
116+
input: https://cf.10xgenomics.com/samples/xenium/1.5.0/Xenium_V1_hKidney_cancer_section/Xenium_V1_hKidney_cancer_section_outs.zip
117+
dataset_name: "Xenium V1 hKidney cancer section"
118+
dataset_url: "https://www.10xgenomics.com/datasets/human-kidney-preview-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
119+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human kidney sections, using a development version of the Xenium Human Multi-Tissue and Cancer Panel."
120+
dataset_description: "Kidney cancer (papillary renal cell carcinoma, or PRCC). Preview of Xenium In Situ Gene Expression data for adult human kidney sections, using a development version of the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.5.0"
121+
dataset_organism: "homo_sapiens"
122+
segmentation_id: [cell, nucleus]
123+
124+
- id: "10x_xenium/2024_10x_mouse_colon_xenium"
125+
input: https://cf.10xgenomics.com/samples/xenium/2.0.0/Xenium_V1_mouse_Colon_FF/Xenium_V1_mouse_Colon_FF_outs.zip
126+
dataset_name: "Xenium V1 mouse Colon FF"
127+
dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-colon-with-xenium-multimodal-cell-segmentation-1-standard"
128+
dataset_summary: "Xenium In Situ Gene Expression with Cell Segmentation Staining data for mouse colon tissue using the Xenium Mouse Tissue Atlassing Panel."
129+
dataset_description: "Xenium In Situ Gene Expression with Cell Segmentation Staining data for mouse colon tissue using the Xenium Mouse Tissue Atlassing Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 2.0.0"
130+
dataset_organism: "mus_musculus"
131+
segmentation_id: [cell, nucleus]
132+
133+
- id: "10x_xenium/2023_10x_human_lymph_node_xenium"
134+
input: https://cf.10xgenomics.com/samples/xenium/1.5.0/Xenium_V1_hLymphNode_nondiseased_section/Xenium_V1_hLymphNode_nondiseased_section_outs.zip
135+
dataset_name: "Xenium V1 hLymphNode nondiseased section"
136+
dataset_url: "https://www.10xgenomics.com/datasets/human-lymph-node-preview-data-xenium-human-multi-tissue-and-cancer-panel-1-standard"
137+
dataset_summary: "Preview of Xenium In Situ Gene Expression data for adult human lymph node, using a development version of the Xenium Human Multi-Tissue and Cancer Panel."
138+
dataset_description: "The selected section represents a non-diseased lymph node tissue. Preview of Xenium In Situ Gene Expression data for adult human lymph node, using a development version of the Xenium Human Multi-Tissue and Cancer Panel. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 1.5.0"
139+
dataset_organism: "homo_sapiens"
140+
segmentation_id: [cell, nucleus]
141+
142+
- id: "10x_xenium/2024_10x_human_ovarian_cancer_xenium"
143+
input: https://cf.10xgenomics.com/samples/xenium/2.0.0/Xenium_V1_Human_Ovarian_Cancer_Addon_FFPE/Xenium_V1_Human_Ovarian_Cancer_Addon_FFPE_outs.zip
144+
dataset_name: "Xenium V1 Human Ovarian Cancer Addon FFPE"
145+
dataset_url: "https://www.10xgenomics.com/datasets/ffpe-human-ovarian-cancer-data-with-human-immuno-oncology-profiling-panel-and-custom-add-on-1-standard"
146+
dataset_summary: "Xenium In Situ Gene Expression data for human ovarian cancer tissue using the Xenium Human Immuno-Oncology Profiling Panel with custom add-on."
147+
dataset_description: "Ovary Serous Carcinoma; Stage II-A; Grade 3. Xenium In Situ Gene Expression data for human ovarian cancer tissue using the Xenium Human Immuno-Oncology Profiling Panel with custom add-on. In Situ Gene Expression dataset analyzed using Xenium Onboard Analysis 2.0.0"
148+
dataset_organism: "homo_sapiens"
149+
segmentation_id: [cell, nucleus]
150+
151+
152+
output_dataset: "\$id/dataset.zarr"
153+
output_state: "\$id/state.yaml"
154+
publish_dir: "$publish_dir"
155+
HERE
156+
157+
tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
158+
--revision build/main \
159+
--pull-latest \
160+
--main-script target/nextflow/datasets/workflows/process_tenx_xenium/main.nf \
161+
--workspace 53907369739130 \
162+
--params-file /tmp/params.yaml \
163+
--config common/nextflow_helpers/labels_tw.config \
164+
--labels datasets,10x_xenium

src/datasets/loaders/tenx_xenium/config.vsh.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ namespace: datasets/loaders
44
argument_groups:
55
- name: Inputs
66
arguments:
7-
- type: file
7+
- type: string
88
name: --input
99
required: true
10-
description: A 10x xenium directory or zip file
10+
description: A 10x xenium directory or zip file or download url
1111
- type: string
1212
name: --segmentation_id
1313
required: true

src/datasets/loaders/tenx_xenium/script.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,38 @@
88

99
## VIASH START
1010
par = {
11-
"input": "temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs",
11+
"input": "https://cf.10xgenomics.com/samples/xenium/1.9.0/Xenium_V1_hLiver_cancer_section_FFPE/Xenium_V1_hLiver_cancer_section_FFPE_outs.zip",
1212
"segmentation_id": [
1313
"cell",
1414
"nucleus",
1515
],
16-
"output": "output.zarr",
1716
"dataset_id": "value",
1817
"dataset_name": "value",
1918
"dataset_url": "value",
2019
"dataset_reference": "value",
2120
"dataset_summary": "value",
2221
"dataset_description": "value",
2322
"dataset_organism": "value",
24-
"output": "temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium_rep1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zarr"
23+
"output": "temp/datasets/10x_xenium/liver/liver.zarr"
24+
}
25+
meta = {
26+
"cpus": 1,
2527
}
26-
## VIASH END
2728

29+
## VIASH END
2830

29-
# if input is a zip, extract it to a temporary folder
31+
# Download the data if it's a download url, extract the data if it's a zip file
3032
par_input = par["input"]
3133
with tempfile.TemporaryDirectory() as tmpdirname:
34+
if par_input.startswith("http"):
35+
print(f"Downloading data to {tmpdirname}", flush=True)
36+
file_name = par_input.split("/")[-1]
37+
# download the data
38+
os.system(f"wget {par['input']} -O {tmpdirname}/{file_name}")
39+
par_input = tmpdirname + "/" + file_name
40+
3241
if zipfile.is_zipfile(par_input):
33-
print("Extracting input zip", flush=True)
42+
print(f"Extracting input zip to {tmpdirname}", flush=True)
3443
with zipfile.ZipFile(par_input, "r") as zip_ref:
3544
zip_ref.extractall(tmpdirname)
3645
par_input = tmpdirname

src/datasets/workflows/process_tenx_xenium/config.vsh.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ namespace: datasets/workflows
44
argument_groups:
55
- name: Inputs
66
arguments:
7-
- type: file
7+
- type: string
88
name: --input
99
required: true
10-
description: A 10x xenium directory or zip file
10+
description: A 10x xenium directory or zip file or download url
1111
- type: string
1212
name: --segmentation_id
1313
required: true

0 commit comments

Comments
 (0)