Skip to content

Commit 3d2d046

Browse files
committed
clean up processing dataset component and test resource script
1 parent 86d42da commit 3d2d046

File tree

3 files changed

+4
-69
lines changed

3 files changed

+4
-69
lines changed

scripts/create_resources/test_resources.sh

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -19,66 +19,6 @@ DATASET_DIR=resources_test/task_cyto_batch_integration/starter_file
1919

2020
mkdir -p $DATASET_DIR
2121

22-
# TODO: get original_dataset.h5ad from somewhere
23-
24-
# wget https://zenodo.org/records/13928969/files/ID1_Panel1_TP1.fcs?download=1 \
25-
# -O $DATASET_DIR/ID1_Panel1_TP1.fcs
26-
27-
# python << HERE
28-
# import readfcs
29-
# ad = readfcs.read("$DATASET_DIR/ID1_Panel1_TP1.fcs")
30-
# ad.layers["transformed"] = ad.X
31-
# del ad.X
32-
# # todo: add other preprocessing steps to make sure the dataset is a common dataset
33-
# ad.write_h5ad("$DATASET_DIR/common_dataset.h5ad")
34-
# HERE
35-
36-
python << HERE
37-
import anndata as ad
38-
39-
adata = ad.read_h5ad("resources_test/task_cyto_batch_integration/starter_file/common_dataset.h5ad")
40-
41-
channelsofinterest = ['UV379-A',
42-
'UV515-A',
43-
'UV610-A',
44-
'UV735-A',
45-
'V431-A',
46-
'V525-A',
47-
'V586-A',
48-
'V605-A',
49-
'V677-A',
50-
'V710-A',
51-
'V750-A',
52-
'V810-A',
53-
'B530-A',
54-
'B710-A',
55-
'YG586-A',
56-
'YG610-A',
57-
'YG670-A',
58-
'YG780-A',
59-
'R670-A',
60-
'R730-A']
61-
adata.var.rename(columns={"n":"numeric_id"}, inplace=True)
62-
marker_types = ["lineage" if chan in channelsofinterest else 'functional' for chan in adata.var["channel"]]
63-
to_correct = [True if chan in channelsofinterest else False for chan in adata.var["channel"]]
64-
adata.var["marker_type"] = marker_types
65-
adata.var['to_correct'] = to_correct
66-
adata.uns['dataset_id'] = 'XXXXX'
67-
adata.uns['dataset_name'] = 'Summer School data'
68-
adata.uns['dataset_summary'] = 'Draft data for cytometry batch integration benchmark'
69-
adata.uns['dataset_description'] = '''
70-
This is a draft dataset for the cytometry batch integration benchmark (Summer School).
71-
It contains only samples from one batch (Day1).
72-
Even though a preprocessed layer is available, it only contains arcsinh transformed data (not cleaned or compensated data).
73-
'''
74-
adata.uns['dataset_url'] = "https://saeyslab.sites.vib.be"
75-
adata.uns['dataset_organism'] = "mus_musculus"
76-
adata.uns['dataset_reference'] = "unpublished"
77-
78-
out_file = "resources_test/task_cyto_batch_integration/starter_file/common_dataset.h5ad"
79-
adata.write_h5ad(out_file, compression="gzip")
80-
HERE
81-
8222
# process dataset
8323
viash run src/data_processors/process_dataset/config.vsh.yaml -- \
8424
--input $RAW_DATA/common_dataset.h5ad \

src/data_processors/process_dataset/config.vsh.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
__merge__: ../../api/comp_data_processor.yaml
22
name: process_dataset
3-
arguments:
4-
- name: "--validation_sample_names"
5-
type: "string"
6-
description: "The process method to assign train/test."
7-
multiple: true
3+
84
resources:
95
- type: python_script
106
path: script.py

src/data_processors/process_dataset/script.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
## VIASH START
66
par = {
77
'input': 'resources_test/task_cyto_batch_integration/starter_file/common_dataset.h5ad',
8-
'validation_sample_names': [],
9-
'output_unintegrated': 'unintegrated.h5ad',
10-
'output_unintegrated_censored': 'unintegrated_censored.h5ad',
11-
'output_validation': 'validation.h5ad'
8+
'output_unintegrated': 'resources_test/task_cyto_batch_integration/starter_file/unintegrated.h5ad',
9+
'output_unintegrated_censored': 'resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad',
10+
'output_validation': 'resources_test/task_cyto_batch_integration/starter_file/validation.h5ad'
1211
}
1312
meta = {
1413
'resources_dir': 'target/executable/data_processors/process_dataset',

0 commit comments

Comments
 (0)