@@ -19,66 +19,6 @@ DATASET_DIR=resources_test/task_cyto_batch_integration/starter_file
1919
2020mkdir -p $DATASET_DIR
2121
22- # TODO: get original_dataset.h5ad from somewhere
23-
24- # wget https://zenodo.org/records/13928969/files/ID1_Panel1_TP1.fcs?download=1 \
25- # -O $DATASET_DIR/ID1_Panel1_TP1.fcs
26-
27- # python << HERE
28- # import readfcs
29- # ad = readfcs.read("$DATASET_DIR/ID1_Panel1_TP1.fcs")
30- # ad.layers["transformed"] = ad.X
31- # del ad.X
32- # # todo: add other preprocessing steps to make sure the dataset is a common dataset
33- # ad.write_h5ad("$DATASET_DIR/common_dataset.h5ad")
34- # HERE
35-
36- python << HERE
37- import anndata as ad
38-
39- adata = ad.read_h5ad("resources_test/task_cyto_batch_integration/starter_file/common_dataset.h5ad")
40-
41- channelsofinterest = ['UV379-A',
42- 'UV515-A',
43- 'UV610-A',
44- 'UV735-A',
45- 'V431-A',
46- 'V525-A',
47- 'V586-A',
48- 'V605-A',
49- 'V677-A',
50- 'V710-A',
51- 'V750-A',
52- 'V810-A',
53- 'B530-A',
54- 'B710-A',
55- 'YG586-A',
56- 'YG610-A',
57- 'YG670-A',
58- 'YG780-A',
59- 'R670-A',
60- 'R730-A']
61- adata.var.rename(columns={"n":"numeric_id"}, inplace=True)
62- marker_types = ["lineage" if chan in channelsofinterest else 'functional' for chan in adata.var["channel"]]
63- to_correct = [True if chan in channelsofinterest else False for chan in adata.var["channel"]]
64- adata.var["marker_type"] = marker_types
65- adata.var['to_correct'] = to_correct
66- adata.uns['dataset_id'] = 'XXXXX'
67- adata.uns['dataset_name'] = 'Summer School data'
68- adata.uns['dataset_summary'] = 'Draft data for cytometry batch integration benchmark'
69- adata.uns['dataset_description'] = '''
70- This is a draft dataset for the cytometry batch integration benchmark (Summer School).
71- It contains only samples from one batch (Day1).
72- Even though a preprocessed layer is available, it only contains arcsinh transformed data (not cleaned or compensated data).
73- '''
74- adata.uns['dataset_url'] = "https://saeyslab.sites.vib.be"
75- adata.uns['dataset_organism'] = "mus_musculus"
76- adata.uns['dataset_reference'] = "unpublished"
77-
78- out_file = "resources_test/task_cyto_batch_integration/starter_file/common_dataset.h5ad"
79- adata.write_h5ad(out_file, compression="gzip")
80- HERE
81-
8222# process dataset
8323viash run src/data_processors/process_dataset/config.vsh.yaml -- \
8424 --input $RAW_DATA /common_dataset.h5ad \
0 commit comments