-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNumbat
More file actions
59 lines (50 loc) · 2.23 KB
/
Numbat
File metadata and controls
59 lines (50 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
library(Seurat)
library(numbat)
# SNP pileup and phasing with 1000 Genome reference; hg38 human genome
# needed packages; numbat, cellsnp-lite, eagle2, samtools
# 1000G SNP VCF; wget https://sourceforge.net/projects/cellsnp/files/SNPlist/genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf.gz
# 1000G Reference Panel; wget https://pklab.org/teng/data/1000G_hg38.zip
Rscript pileup_and_phase.R \
--label {sample} \
--samples {sample} \
--bams {sample}.bam \
--barcodes barcodes.tsv \
--outdir {sample} \
--gmap /Eagle_v2.4.1/tables/genetic_map_hg38_withX.txt.gz \
--snpvcf /genome1K.phase3.SNP_AF5e2.chr1toX.hg38.vcf \
--paneldir /1000G_hg38 \
--ncores ncores
data <- readRDS("object.rds")
seurat_obj <- UpdateSeuratObject(object = data)
seurat_obj$binary_cell_type <- ifelse(grepl("B_cell", seurat_obj$singlr_labels), "malignant", "healthy")
# Subset assumed malignant cells
tumor <- subset(seurat_obj, subset = binary_cell_type == "malignant")
malignant_count_mtx <- as.matrix(tumor@assays$RNA$counts)
# Subset reference healthy cells
normal <- subset(seurat_obj, subset = binary_cell_type == "healthy")
ref_mtx <- as.matrix(normal@assays$RNA$counts)
# Define annotations for the healthy reference
library(dplyr)
cell_info <- data.frame(
cell = rownames(normal@meta.data),
group = normal@meta.data$singlr_labels,
stringsAsFactors = FALSE
)
ref_internal = aggregate_counts(ref_mtx, cell_info)
# Read in the pileup output
pileup <- read.table("allele_counts.tsv", header = TRUE, sep = "\t")
df <- pileup %>% filter(cell %in% colnames(malignant_count_mtx))
# Define breakpoints and their CNV status if ground truth is available
CNV <- read.table("ground_truth_breakpoints.txt", header = TRUE, sep = "\t")
# Run Numbat
out = run_numbat(
count_mtx, # gene x cell integer UMI count matrix
ref_internal, # reference expression profile, a gene x cell type normalized expression level matrix
df, # allele dataframe generated by pileup_and_phase script
genome = "hg38",
init_k = 10, # Optional; initial grouping of tumor cells, higher values better for more heterogeneous samples
ncores = 20,
segs_consensus_fix = CNV, # Optional; if ground truth breakpoints is defined
plot = TRUE,
out_dir = './out_dir'
)