CNV/inferCNV at main · idkaavan/CNV · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# R package for infering CNV based on mean expression from scRNA
# use singularity version
conda create --name jags_infercnv
conda activate jags_infercnv
conda config --add channels conda-forge
conda config --set channel_priority strict
conda install jags -y
conda install -c conda-forge r-biocmanager -y
# retrieve the singularity file
wget https://data.broadinstitute.org/Trinity/CTAT_SINGULARITY/InferCNV/infercnv-1.20.0.simg

# cd 'directory where simg is saved in'
singularity shell --writable-tmpfs -e -B '/research/groups/bioinformaticians/internship/aalizade/infercnv/' infercnv-1.20.0.simg
# Apptainer>
# type R and load libraries

# create infercnv object from seurat object (gene x cell count matrix), cell annotation as tab-delimited txt file, gene coordinates (chr, start, end)
library(Seurat)
library(infercnv)
data <- readRDS("object.rds")
seurat_obj <- UpdateSeuratObject(object = data)

# Set binary "malignant"/"healthy" cell types for defining reference
seurat_obj$binary_cell_type <- ifelse(grepl("B_cell", seurat_obj$singlr_labels), "malignant", "healthy")

cell_annotation <- file.path("annotation.txt")
gene_ordering_file <- file.path("geneshg20_order.txt")

# Create infercnv object
infercnv_obj = CreateInfercnvObject(raw_counts_matrix=LayerData(sobj, assay = "RNA", layer = "counts"),
                                    annotations_file=cell_annotation,
                                    delim="\t",
                                    gene_order_file=gene_ordering_file,
                                    ref_group_names=c("healthy")
)

# infercnv CNV calling
infercnv_obj = infercnv::run(infercnv_obj,
                             cutoff=0.1,  # Gene filtering threshold, 0.1 recommended by developers for 10x data, 1 for less sparse data
                             num_threads = 1,
                             analysis_mode='subclusters',  # Default runs with "samples" operates at the level of whole samples, with "subcluste" CNV prediction is performed at the level of the groups of tumor cells rather than whole sample
                             out_dir="ALL",
                             cluster_by_groups=T,  # Whether to cluster tumor cells by their annotations or not
                             scale_data=T,
                             denoise=T,
                             noise_filter=0.12,  # Whitening effect; values +- from the reference cell mean will be set to zero
                             HMM=T,
                             HMM_type = 'i6'  # Reports six CNV status; one copy loss, two copies loss, neutral, one copy gain, two copies gain, three copies gain, "i3" for simple loss, neutral, gain CNV status report
)