Skip to content

Commit c7dd0ee

Browse files
committed
Adjust metadata saving in dataset processor
1 parent 69e6b4d commit c7dd0ee

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

src/data_processors/process_dataset/script.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
"input_sc": "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad",
1010
"input_sp": "resources_test/common/2023_10x_mouse_brain_xenium_rep1/dataset.zarr",
1111
"output_scrnaseq": "resources_test/task_ist_preprocessing/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad",
12-
"output_ist": "resources_test/task_ist_preprocessing/2023_10x_mouse_brain_xenium_rep1/dataset.zarr"
12+
"output_ist": "resources_test/task_ist_preprocessing/2023_10x_mouse_brain_xenium_rep1/dataset.zarr",
13+
"dataset_id": "mouse_brain_combined",
14+
"dataset_name": "Test data mouse brain combined 2023 tenx Xenium replicate 1 2023 Yao scRNAseq",
15+
"dataset_url": "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717",
16+
"dataset_reference": "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;10.1038/s41586-023-06812-z",
17+
"dataset_summary": "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1);A high-resolution scRNAseq atlas of cell types in the whole mouse brain",
18+
"dataset_description": "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). Replicate results demonstrate the high reproducibility of data generated by the platform. 10x Genomics obtained tissue from a C57BL/6 mouse from Charles River Laboratories. Three adjacent 10µm sections were placed on the same slide. Tissues were prepared following the demonstrated protocols Xenium In Situ for Fresh Frozen Tissues - Tissue Preparation Guide (CG000579) and Xenium In Situ for Fresh Frozen Tissues - Fixation & Permeabilization (CG000581).;See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset.",
19+
"dataset_organism": "mus_musculus"
1320
}
1421
### VIASH END
1522

@@ -129,7 +136,7 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
129136
subsample_from_idx = i
130137
break
131138
df["sum"] = tmp
132-
139+
133140
# Get number of samples per group
134141
n_samples_no_sampling = df.iloc[:subsample_from_idx]["n_cells"].sum()
135142
n_samples_to_subsample = n_samples - n_samples_no_sampling
@@ -156,7 +163,7 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
156163
ct_obs = mask_df.loc[mask_df[group_key] == ct].index
157164
ct_obs_subsample = np.random.choice(ct_obs, size=df.iloc[i]["n_samples"], replace=False)
158165
mask_df.loc[ct_obs_subsample, "in_subsample"] = True
159-
166+
160167
return mask_df["in_subsample"]
161168

162169

@@ -190,9 +197,11 @@ def subsample_adata_group_balanced(adata, group_key, n_samples, seed=0):
190197
if orig_col in adata.uns:
191198
adata.uns[orig_col] = adata.uns[col]
192199
adata.uns[col] = par[col]
193-
if orig_col in sdata.table.uns:
194-
sdata.table.uns[orig_col] = sdata.table.uns[col]
195-
sdata.table.uns[col] = par[col]
200+
if not ("table" in sdata.tables):
201+
sdata["table"] = ad.AnnData(uns={})
202+
if orig_col in sdata["table"].uns:
203+
sdata["table"].uns[orig_col] = sdata["table"].uns[col]
204+
sdata["table"].uns[col] = par[col]
196205

197206
# Correct the feature_key attribute in sdata if needed
198207
# NOTE: it would have been better to do this in the loader scripts, but this way the datasets don't need to be re-downloaded

0 commit comments

Comments
 (0)