task_batch_integration/src/methods/scgpt_finetuned/config.vsh.yaml at 03abde877fd43c9514a60d81cbf5d97ddba08235 · jkobject/task_batch_integration · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
__merge__: ../../api/base_method.yaml

name: scgpt_finetuned
label: scGPT (fine-tuned)
summary: "A foundation model for single-cell biology (fine-tuned)"
description: |
  scGPT is a foundation model for single-cell biology based on a generative
  pre-trained transformer and trained on a repository of over 33 million cells.

  Here, we fine-tune the pre-trained model for the batch integration task.
references:
  doi:
    - 10.1038/s41592-024-02201-0
links:
  documentation: https://scgpt.readthedocs.io/en/latest/
  repository: https://github.com/bowang-lab/scGPT

info:
  method_types: [embedding]
  preferred_normalization: counts
  variants:
    scgpt_finetuned_default:

arguments:
  - name: --model_name
    type: string
    description: String giving the name of the scGPT model to use
    choices: ["scGPT_human", "scGPT_CP"]
    default: "scGPT_human"
  - name: --model
    type: file
    description: |
      Path to the directory containing the scGPT model specified by model_name
      or a .zip/.tar.gz archive to extract. If not given the model will be
      downloaded.
    required: false
  - name: --n_hvg
    type: integer
    default: 3000
    description: Number of highly variable genes to use.

resources:
  - type: python_script
    path: script.py
  - path: /src/utils/read_anndata_partial.py
  - path: scgpt_functions.py
  - path: /src/utils/exit_codes.py

engines:
  - type: docker
    image: openproblems/base_pytorch_nvidia:1
    # TODO: Try to find working installation of flash attention (flash-attn<1.0.5)
    setup:
      #- type: python
      #  pypi:
      #    - gdown
      #    - scgpt # Install from PyPI to get dependencies
      #- type: docker
      #  # Force re-installing from GitHub to get bug fixes
      #  run: pip install --upgrade --no-deps --force-reinstall git+https://github.com/bowang-lab/scGPT.git
      - type: docker
        run: |
          git clone https://github.com/bowang-lab/scGPT && \
          pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu121 && \
          pip install "flash-attn<1.0.5" --no-build-isolation && \
          pip install ipykernel pandas scanpy numba "numpy<1.24" torchtext==0.17.0 scib "scvi-tools<1.0" datasets==2.14.5 transformers==4.33.2 wandb "cell-gears<0.0.3" torch_geometric pyarrow==15.0.0 gdown && \
          cd scGPT && pip install -e . --no-deps

runners:
  - type: executable
  - type: nextflow
    directives:
      label: [hightime, midmem, midcpu, biggpu]