galaxyproject · pavanvidem · Oct 30, 2025 · Oct 30, 2025 · bernt-matthias · Oct 30, 2025
diff --git a/tools/celltypist/.shed.yml b/tools/celltypist/.shed.yml
@@ -0,0 +1,12 @@
+name: celltypist
+owner: iuc
+description: "CellTypist – Automated cell type annotation for scRNA-seq datasets"
+homepage_url: https://www.celltypist.org/
+long_description: |
+    CellTypist is an automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm. CellTypist allows for cell prediction using either built-in (with a current focus on immune sub-populations) or custom models, in order to assist in the accurate classification of different cell types and subtypes.
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/celltypist
+type: unrestricted
+categories:
+  - Single Cell
+  - Spatial Omics
+  - Transcriptomics
diff --git a/tools/celltypist/celltypist.xml b/tools/celltypist/celltypist.xml
@@ -0,0 +1,135 @@
+<tool id="celltypist" name="CellTypist" version="@TOOL_VERSION@" profile="24.0">
+    <description>Automated cell type annotation for scRNA-seq datasets</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.7.1</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">celltypist</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">celltypist</requirement>
+    </requirements>
+    <command><![CDATA[
+cat '$script_file' &&
+python '$script_file'
+    ]]>
+    </command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+import scanpy as sc
+import celltypist
+from celltypist import models
+
+adata = sc.read_h5ad('$adata')
+models.download_models(model='$model_name')
+model = models.Model.load(model='$model_name')
+predictions = celltypist.annotate(adata,
+                model=model,
+#if $majority_voting
+                majority_voting=True,
+#end if
+#if $transpose_input
+                transpose_input=$transpose_input,
+#end if
+                mode='$mode',
+                p_thres=$p_thres,
+                min_prop=$min_prop)
+
+adata = predictions.to_adata()
+adata.write_h5ad('$anndata_out', compression='gzip')
+]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="adata" type="data" format="h5ad" label="Input AnnData file" />
+        <!-- will add a datamanager for reproducibilty -->
+        <param name="model_name" type="select" label="Choose CellTypist model">
+            <option value="Immune_All_Low.pkl" selected="true">Immune_All_Low.pkl</option>
+            <option value="Immune_All_High.pkl">Immune_All_High.pkl</option>
+            <option value="Adult_COVID19_PBMC.pkl">Adult_COVID19_PBMC.pkl</option>
+            <option value="Adult_CynomolgusMacaque_Hippocampus.pkl">Adult_CynomolgusMacaque_Hippocampus.pkl</option>
+            <option value="Adult_Human_MTG.pkl">Adult_Human_MTG.pkl</option>
+        </param>
+        <param name="majority_voting" type="boolean" label="Refine the predicted labels by running the majority voting classifier after over-clustering" value="True" />
+        <param name="transpose_input" type="boolean" label="Transpose the input matrix is provided in the gene-by-cell format." value="False" help="Note Celltypist requires the cell-by-gene format"/>
+        <param name="mode" type="select" label="Annotation mode">
+            <option value="best match">Choose the cell type with the largest score/probability as the final prediction</option>
+            <option value="prob match">Enable a multi-label classification utilising a probability threshold</option>
+        </param>
+        <param name="p_thres" type="float" value="0.5" min="0" max="1" label="Probability threshold for the multi-label classification" help="Ignored if mode is best match." />
+        <param name="min_prop" type="float" value="0" min="0" max="1" label="The minimum proportion of cells required to support naming of the subcluster by this cell type" help="Ignored if majority_voting is set to False"/>
+    </inputs>
+    <outputs>
+        <data name="anndata_out" format="h5ad" label="${tool.name} on ${on_string}: AnnData with celltype annotations" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="adata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad"/>
+            <param name="model_name" value="Immune_All_Low.pkl" />
+            <param name="majority_voting" value="True" />
+            <param name="mode" value="best match" />
+            <param name="p_thres" value="0.5" />
+            <param name="min_prop" value="0.05" />
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/predicted_labels"/>
+                    <has_h5_keys keys="obs/over_clustering"/>
+                    <has_h5_keys keys="obs/majority_voting"/>
+                    <has_h5_keys keys="obs/conf_score"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="adata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad"/>
+            <param name="model_name" value="Immune_All_High.pkl" />
+            <param name="majority_voting" value="False" />
+            <param name="mode" value="prob match" />
+            <param name="p_thres" value="0.5" />
+            <param name="min_prop" value="0.05" />
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/predicted_labels"/>
+                    <has_h5_keys keys="obs/conf_score"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="celltypist_annotated.h5ad" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+CellTypist is an automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm. CellTypist allows for cell prediction using either built-in (with a current focus on immune sub-populations) or custom models, in order to assist in the accurate classification of different cell types and subtypes.
+
+
+.. _CellTypist: https://www.celltypist.org/
+
+------
+
+**Inputs**
+
+An anndata file in h5ad format that usually contains clustering results from single-cell RNA-seq analysis.
+
+------
+
+**Outputs**
+
+An anndata file in h5ad format with predicted cell type annotations added to the .obs attribute.
+
+cell_type	predicted_labels	over_clustering	majority_voting	conf_score
+cell1	Plasma cells	Plasma cells	13	Follicular B cells	0.996313
+cell2	Plasma cells	Plasma cells	6	Plasma cells	0.999478
+cell3	Plasma cells	Plasma cells	12	Plasma cells	0.999957
+cell4	Plasma cells	Plasma cells	6	Plasma cells	0.996070
+cell5	Plasma cells	Plasma cells	6	Plasma cells	0.998888
+...	...	...	...	...	...
+cell496	Macro_pDC	pDC	9	Macrophages	0.187152
+cell497	Macro_pDC	Macrophages	18	pDC	0.849831
+cell498	Macro_pDC	Macrophages	9	Macrophages	0.809677
+cell499	Macro_pDC	Macrophages	9	Macrophages	0.937306
+cell500	Macro_pDC	pDC	9	Macrophages	0.612069
+
+    ]]>    </help>
+    <citations>
+        <citation type="doi">10.1126/science.abl5197</citation>
+    </citations>
+</tool>