Skip to content

Commit e0f5044

Browse files
committed
Add CellTypist tool for automated celltye annotation
1 parent 450d1ac commit e0f5044

File tree

2 files changed

+124
-0
lines changed

2 files changed

+124
-0
lines changed

tools/celltypist/.shed.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
name: celltypist
2+
owner: iuc
3+
description: "CellTypist – Automated cell type annotation for scRNA-seq datasets"
4+
homepage_url: https://www.celltypist.org/
5+
long_description: |
6+
CellTypist is an automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm. CellTypist allows for cell prediction using either built-in (with a current focus on immune sub-populations) or custom models, in order to assist in the accurate classification of different cell types and subtypes.
7+
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/celltypist
8+
type: unrestricted
9+
categories:
10+
- Single Cell
11+
- Spatial Omics
12+
- Transcriptomics

tools/celltypist/celltypist.xml

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
<tool id="celltypist" name="CellTypist" version="@TOOL_VERSION@" profile="24.0">
2+
<description>Automated cell type annotation for scRNA-seq datasets</description>
3+
<macros>
4+
<token name="@TOOL_VERSION@">1.7.1</token>
5+
</macros>
6+
<xrefs>
7+
<xref type="bio.tools">celltypist</xref>
8+
</xrefs>
9+
<requirements>
10+
<requirement type="package" version="@TOOL_VERSION@">celltypist</requirement>
11+
</requirements>
12+
<command><![CDATA[
13+
cp '$adata' 'anndata.h5ad' &&
14+
cat '$script_file' &&
15+
python '$script_file' &&
16+
]]>
17+
</command>
18+
<configfiles>
19+
<configfile name="script_file"><![CDATA[
20+
import scanpy as sc
21+
import celltypist
22+
from celltypist import models
23+
24+
adata = sc.read_h5ad('anndata.h5ad') models.download_models(models='$model_name', cache_dir='./celltypist_models_cache')
25+
model = models.Model.load(models='$model_name', cache_dir='./celltypist_models_cache')
26+
predictions = celltypist.annotate(adata,
27+
model=model,
28+
majority_voting=$majority_voting,
29+
transpose_input=$transpose_input,
30+
mode='$mode',
31+
p_thres=$p_thres,
32+
min_prop=$min_prop)
33+
34+
adata = predictions.to_adata()
35+
adata.write_h5ad('$anndata_out', compression='gzip')
36+
]]>
37+
</configfile>
38+
</configfiles>
39+
<inputs>
40+
<param name="adata" type="data" format="h5ad" label="Input AnnData file" />
41+
<!-- will add a datamanager for reproducibilty -->
42+
<param name="model_name" type="select" label="Choose CellTypist model">
43+
<option value="Immune_All_Low.pkl" selected="true">Immune_All_Low.pkl</option>
44+
<option value="Immune_All_High.pkl">Immune_All_High.pkl</option>
45+
<option value="Adult_COVID19_PBMC.pkl">Adult_COVID19_PBMC.pkl</option>
46+
<option value="Adult_CynomolgusMacaque_Hippocampus.pkl">Adult_CynomolgusMacaque_Hippocampus.pkl</option>
47+
<option value="Adult_Human_MTG.pkl">Adult_Human_MTG.pkl</option>
48+
</param>
49+
<param name="majority_voting" type="boolean" label="Refine the predicted labels by running the majority voting classifier after over-clustering" value="True" />
50+
<param name="transpose_input" type="boolean" label="Transpose the input matrix is provided in the gene-by-cell format." value="False" help="Note Celltypist requires the cell-by-gene format"/>
51+
<param name="mode" type="select" label="Annotation mode">
52+
<option value="best match">Choose the cell type with the largest score/probability as the final prediction</option>
53+
<option value="prob match">Enable a multi-label classification utilising a probability threshold</option>
54+
</param>
55+
<param name="p_thres" type="float" label="Probability threshold" value="0.5" />
56+
<param name="min_prop" type="float" label="Minimum proportion for cell type assignment" value="0.05" />
57+
<param name="reference" type="text" label="Reference column in AnnData.obs for dotplot" value="cell_type" />
58+
<param name="prediction" type="text" label="Prediction column in AnnData.obs for dotplot" value="predicted_labels" />
59+
</inputs>
60+
<outputs>
61+
<data name="anndata_out" format="h5ad" label="${tool.name} on ${on_string}: AnnData with celltype annotations" />
62+
</outputs>
63+
<tests>
64+
<test>
65+
<param name="adata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad"/>
66+
<param name="model_name" value="Immune_All_Low.pkl" />
67+
<param name="majority_voting" value="True" />
68+
<param name="transpose_input" value="False" />
69+
<param name="mode" value="best match" />
70+
<param name="p_thres" value="0.5" />
71+
<param name="min_prop" value="0.05" />
72+
<output name="anndata_out" file="expected_celltypist_output.h5ad" />
73+
</test>
74+
</tests>
75+
<help><![CDATA[
76+
**What it does**
77+
78+
CellTypist is an automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm. CellTypist allows for cell prediction using either built-in (with a current focus on immune sub-populations) or custom models, in order to assist in the accurate classification of different cell types and subtypes.
79+
80+
81+
.. _CellTypist: https://www.celltypist.org/
82+
83+
------
84+
85+
**Inputs**
86+
87+
An anndata file in h5ad format that usually contains clustering results from single-cell RNA-seq analysis.
88+
89+
------
90+
91+
**Outputs**
92+
93+
An anndata file in h5ad format with predicted cell type annotations added to the .obs attribute.
94+
95+
cell_type predicted_labels over_clustering majority_voting conf_score
96+
cell1 Plasma cells Plasma cells 13 Follicular B cells 0.996313
97+
cell2 Plasma cells Plasma cells 6 Plasma cells 0.999478
98+
cell3 Plasma cells Plasma cells 12 Plasma cells 0.999957
99+
cell4 Plasma cells Plasma cells 6 Plasma cells 0.996070
100+
cell5 Plasma cells Plasma cells 6 Plasma cells 0.998888
101+
... ... ... ... ... ...
102+
cell496 Macro_pDC pDC 9 Macrophages 0.187152
103+
cell497 Macro_pDC Macrophages 18 pDC 0.849831
104+
cell498 Macro_pDC Macrophages 9 Macrophages 0.809677
105+
cell499 Macro_pDC Macrophages 9 Macrophages 0.937306
106+
cell500 Macro_pDC pDC 9 Macrophages 0.612069
107+
108+
]]> </help>
109+
<citations>
110+
<citation type="doi">10.1126/science.abl5197</citation>
111+
</citations>
112+
</tool>

0 commit comments

Comments
 (0)