Skip to content

Commit af3303a

Browse files
Matthew BernsteinMatthew Bernstein
authored andcommitted
Use variable genes in PCA within clustering wrapper
1 parent 78859d0 commit af3303a

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

cello/scanpy_cello.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ def cello(
129129
mod = ce._retrieve_pretrained_model(adata, algo, rsrc_loc)
130130
if mod is None:
131131
mod = ce.train_model(
132-
adata, rsrc_loc, algo=algo, log_dir=log_dir
132+
adata,
133+
rsrc_loc,
134+
algo=algo,
135+
log_dir=log_dir
133136
)
134137
if out_prefix:
135138
out_model_f = '{}.model.dill'.format(out_prefix)
@@ -212,7 +215,8 @@ def normalize_and_cluster(
212215
adata: AnnData,
213216
n_pca_components: int = 50,
214217
n_neighbors: int = 15,
215-
cluster_res: float = 1.0
218+
n_top_genes: int = 10000,
219+
cluster_res: float = 2.0
216220
):
217221
"""
218222
Normalize and cluster an expression matrix in units of raw UMI counts.
@@ -228,7 +232,10 @@ def normalize_and_cluster(
228232
Number of neighbors to use for computing the nearest-neighbors
229233
graph. Clustering is performed using community detection on this
230234
nearest-neighbors graph.
231-
cluster_res (default 1.0)
235+
n_top_genes (default 10000)
236+
Number of genes selected for computing the nearest-neighbors graph
237+
and for clustering.
238+
cluster_res (default 2.0)
232239
Cluster resolution for the Leiden community detection algorithm.
233240
A higher resolution produces more fine-grained, smaller clusters.
234241
"""
@@ -238,7 +245,8 @@ def normalize_and_cluster(
238245
sys.exit("The function 'normalize_and_cluster' requires that scanpy package be installed. To install scanpy, run 'pip install scanpy'")
239246
sc.pp.normalize_total(adata, target_sum=1e6)
240247
sc.pp.log1p(adata)
241-
sc.pp.pca(adata, n_comps=n_pca_components)
248+
sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes)
249+
sc.pp.pca(adata, n_comps=n_pca_components, use_highly_variable=True)
242250
sc.pp.neighbors(adata, n_neighbors=n_neighbors)
243251
sc.tl.leiden(adata, resolution=cluster_res)
244252

0 commit comments

Comments
 (0)