forked from openproblems-bio/task_batch_integration
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.vsh.yaml
More file actions
73 lines (66 loc) · 2.46 KB
/
config.vsh.yaml
File metadata and controls
73 lines (66 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
__merge__: ../../api/base_method.yaml
name: scgpt_finetuned
label: scGPT (fine-tuned)
summary: "A foundation model for single-cell biology (fine-tuned)"
description: |
scGPT is a foundation model for single-cell biology based on a generative
pre-trained transformer and trained on a repository of over 33 million cells.
Here, we fine-tune the pre-trained model for the batch integration task.
references:
doi:
- 10.1038/s41592-024-02201-0
links:
documentation: https://scgpt.readthedocs.io/en/latest/
repository: https://github.com/bowang-lab/scGPT
info:
method_types: [embedding]
preferred_normalization: counts
variants:
scgpt_finetuned_default:
arguments:
- name: --model_name
type: string
description: String giving the name of the scGPT model to use
choices: ["scGPT_human", "scGPT_CP"]
default: "scGPT_human"
- name: --model
type: file
description: |
Path to the directory containing the scGPT model specified by model_name
or a .zip/.tar.gz archive to extract. If not given the model will be
downloaded.
required: false
- name: --n_hvg
type: integer
default: 3000
description: Number of highly variable genes to use.
resources:
- type: python_script
path: script.py
- path: /src/utils/read_anndata_partial.py
- path: scgpt_functions.py
- path: /src/utils/exit_codes.py
engines:
- type: docker
image: openproblems/base_pytorch_nvidia:1
# TODO: Try to find working installation of flash attention (flash-attn<1.0.5)
setup:
#- type: python
# pypi:
# - gdown
# - scgpt # Install from PyPI to get dependencies
#- type: docker
# # Force re-installing from GitHub to get bug fixes
# run: pip install --upgrade --no-deps --force-reinstall git+https://github.com/bowang-lab/scGPT.git
- type: docker
run: |
git clone https://github.com/bowang-lab/scGPT && \
pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu121 && \
pip install "flash-attn<1.0.5" --no-build-isolation && \
pip install ipykernel pandas scanpy numba "numpy<1.24" torchtext==0.17.0 scib "scvi-tools<1.0" datasets==2.14.5 transformers==4.33.2 wandb "cell-gears<0.0.3" torch_geometric pyarrow==15.0.0 gdown && \
cd scGPT && pip install -e . --no-deps
runners:
- type: executable
- type: nextflow
directives:
label: [hightime, midmem, midcpu, biggpu]