-
Notifications
You must be signed in to change notification settings - Fork 101
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Dear authors, I find the nCount_ATAC in the metadata will decrease for all the cells after running TSSEnrichment(ATAC, fast = FALSE) for a Seurat object with snATAC data from several samples (with separate fragment.tsv.gz). I checked the original code for TSSEnrichment but have no idea why this will happen.
I have tried on my own data and public data, the same problem emerges. The problem will only occur when fast set to FALSE and with number of samples >=2.
public data used:
- https://storage.googleapis.com/linnarsson-lab-human/ATAC_dev/10X/10X290_1_ABCD_1.tar.gz
- https://storage.googleapis.com/linnarsson-lab-human/ATAC_dev/10X/10X346_2_ABCD_2.tar.gz
# 1. QC on single sample =========================================
# get gene annotations for hg38
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))
counts=Read10X_h5(paste0(Parent_Dir,df$filename[i],"filtered_feature_bc_matrix.h5"))
# create a Seurat object containing the RNA adata
S290_1 <- CreateSeuratObject(
counts = counts$`Gene Expression`,
assay = "RNA"
)
S290_1[["percent.mito"]] <- PercentageFeatureSet(S290_1, pattern = "^MT-")
S290_1[["percent.rb"]] <- PercentageFeatureSet(S290_1, pattern = "^RP[SL]")
genes <- rownames(S290_1)
remain <- genes[-grep(pattern = "^GM1|^GM2|^GM3|^GM4|^GM5|^GM6|^GM7|^GM8|^GM9|RIK", x = genes)]
S290_1 <- subset(S290_1, features = remain)
# create ATAC assay and add it to the object
S290_1[["ATAC"]] <- CreateChromatinAssay(
counts = counts$Peaks,
sep = c(":", "-"),
fragments = fragpath,
annotation = annotation
)
S290_1@meta.data$orig.ident <- paste0('S',df$Sample_ID[[i]])
S290_1 <- NucleosomeSignal(S290_1)
S290_1 <- TSSEnrichment(S290_1)
S290_1$blacklist_fraction <- FractionCountsInRegion(object = S290_1, assay = 'ATAC',regions = blacklist_hg38)
# filter out low quality cells
max_nFeature_RNA = quantile(S290_1$nFeature_RNA, 0.95)
S290_1 <- subset(
x = S290_1,
subset = nCount_ATAC < 100000 &
nFeature_RNA < max_nFeature_RNA &
nCount_ATAC > 500 &
nFeature_RNA > 500 &
nucleosome_signal < 2 &
TSS.enrichment > 2 &
#blacklist_fraction < 0.05 & #暂时没有进行这项质控
percent.mito < 5 & percent.rb < 10
)
meta_data <- S290_1@meta.data
meta_dir=paste0(Parent_Dir,"Meta/",S290_1@meta.data$orig.ident[1],"_meta_0603.csv")
# 导出为 CSV 文件
write.csv(meta_data, file = meta_dir, row.names = TRUE)
2. Combine two samples =======================================
peaks1=read.table(
file = paste0(Parent_Dir,'10X290_1_ABCD_1/atac_peaks.bed'),
col.names = c("chr", "start", "end")
)
peaks1 <- makeGRangesFromDataFrame(peaks1)
peaks2=read.table(
file = paste0(Parent_Dir,'10X346_2_ABCD_2/atac_peaks.bed'),
col.names = c("chr", "start", "end")
)
peaks2 <- makeGRangesFromDataFrame(peaks2)
chr<-c()
for (i in 1:22){
chr<-c(chr, paste0("chr",i))
}
chr<-c(chr,"chrX")
chr<-c(chr,"chrY")
peaks1<-peaks1[seqnames(peaks1) %in% chr]
peaks2<-peaks2[seqnames(peaks2) %in% chr]
peaks=list(peaks1,peaks2)
peaks_gl <- GRangesList(peaks) # list -> GRangesList
all_peaks <- unlist(peaks_gl, use.names=FALSE)
combined.peaks <- reduce(all_peaks)
# Filter out bad peaks based on length
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths < 10000 & peakwidths > 20]
Meta1=read.table(
file = paste0(Parent_Dir,'Meta/S290_1_meta_0603.csv'),
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)
Meta2=read.table(
file = paste0(Parent_Dir,'Meta/S346_2_meta_0603.csv'),
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)
frags1= CreateFragmentObject(
path = paste0(Parent_Dir,"10X290_1_ABCD_1/atac_fragments.tsv.gz"),
cells = rownames(Meta1) #根据metadata里的细胞保留
)
frags2= CreateFragmentObject(
path = paste0(Parent_Dir,"10X346_2_ABCD_2/atac_fragments.tsv.gz"),
cells = rownames(Meta2) #根据metadata里的细胞保留
)
counts1=FeatureMatrix(
fragments = frags1,
features = combined.peaks,
cells = rownames(Meta1)
)
counts2=FeatureMatrix(
fragments = frags2,
features = combined.peaks,
cells = rownames(Meta2)
)
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))
# seqlevelsStyle(annotation) <- 'UCSC'
genome(annotation) <- "hg38"
assay1=CreateChromatinAssay(counts1, fragments = frags1,annotation = annotation)
assay2=CreateChromatinAssay(counts2, fragments = frags2,annotation = annotation)
obj1=CreateSeuratObject(assay1, assay = "ATAC", meta.data=Meta1)
obj2=CreateSeuratObject(assay2, assay = "ATAC", meta.data=Meta2)
# merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
x = obj1,
y = obj2,
add.cell.ids = c('1','2')
)
# 3. check the nCount_ATAC ===========================
summary(combined@meta.data$nCount_ATAC)
Meta=combined@meta.data
table(Meta[Meta$nCount_ATAC<500,'orig.ident'])
# 4. run the TssEnrichment =================================
ATAC=combined
ATAC = TSSEnrichment(ATAC, fast = FALSE)
summary(ATAC@meta.data$nCount_ATAC)
Meta=ATAC@meta.data
table(Meta[Meta$nCount_ATAC<500,'orig.ident'])
table(ATAC@meta.data$nCount_ATAC<combined@meta.data$nCount_ATAC)
You will find the table([email protected]$nCount_ATAC<[email protected]$nCount_ATAC) returns TRUE
sessionInfo():
R version 4.4.3 (2025-02-28)
Platform: x86_64-conda-linux-gnu
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS/LAPACK: /public/home/yzhang/micromamba/envs/ATAC2/lib/libopenblasp-r0.3.29.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=zh_CN.UTF-8 LC_NUMERIC=C
[3] LC_TIME=zh_CN.UTF-8 LC_COLLATE=zh_CN.UTF-8
[5] LC_MONETARY=zh_CN.UTF-8 LC_MESSAGES=zh_CN.UTF-8
[7] LC_PAPER=zh_CN.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C
time zone: Asia/Shanghai
tzcode source: system (glibc)
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] BSgenome.Hsapiens.UCSC.hg38_1.4.5 BSgenome_1.74.0
[3] rtracklayer_1.66.0 BiocIO_1.16.0
[5] Biostrings_2.74.0 XVector_0.46.0
[7] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.30.0
[9] AnnotationFilter_1.30.0 GenomicFeatures_1.58.0
[11] AnnotationDbi_1.68.0 Biobase_2.66.0
[13] GenomicRanges_1.58.0 GenomeInfoDb_1.42.0
[15] IRanges_2.40.0 S4Vectors_0.44.0
[17] BiocGenerics_0.52.0 Seurat_5.3.0
[19] SeuratObject_5.1.0 sp_2.2-0
[21] Signac_1.14.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.4.3
[3] later_1.4.2 pbdZMQ_0.3-14
[5] bitops_1.0-9 tibble_3.2.1
[7] polyclip_1.10-7 rpart_4.1.24
[9] XML_3.99-0.17 fastDummies_1.7.5
[11] lifecycle_1.0.4 globals_0.17.0
[13] lattice_0.22-7 MASS_7.3-64
[15] backports_1.5.0 magrittr_2.0.3
[17] rmarkdown_2.29 Hmisc_5.2-3
[19] plotly_4.10.4 yaml_2.3.10
[21] httpuv_1.6.15 sctransform_0.4.1
[23] spam_2.11-1 spatstat.sparse_3.1-0
[25] reticulate_1.42.0 cowplot_1.1.3
[27] pbapply_1.7-2 DBI_1.2.3
[29] RColorBrewer_1.1-3 abind_1.4-5
[31] zlibbioc_1.52.0 Rtsne_0.17
[33] purrr_1.0.4 biovizBase_1.54.0
[35] RCurl_1.98-1.16 nnet_7.3-20
[37] VariantAnnotation_1.52.0 GenomeInfoDbData_1.2.13
[39] ggrepel_0.9.6 irlba_2.3.5.1
[41] listenv_0.9.1 spatstat.utils_3.1-3
[43] goftest_1.2-3 RSpectra_0.16-2
[45] spatstat.random_3.3-3 fitdistrplus_1.2-2
[47] parallelly_1.43.0 DelayedArray_0.32.0
[49] codetools_0.2-20 RcppRoll_0.3.1
[51] tidyselect_1.2.1 UCSC.utils_1.2.0
[53] farver_2.1.2 matrixStats_1.5.0
[55] base64enc_0.1-3 spatstat.explore_3.4-2
[57] GenomicAlignments_1.42.0 jsonlite_2.0.0
[59] Formula_1.2-5 progressr_0.15.1
[61] ggridges_0.5.6 survival_3.8-3
[63] tools_4.4.3 ica_1.0-3
[65] Rcpp_1.0.14 glue_1.8.0
[67] SparseArray_1.6.0 gridExtra_2.3
[69] xfun_0.52 MatrixGenerics_1.18.0
[71] IRdisplay_1.1 dplyr_1.1.4
[73] withr_3.0.2 fastmap_1.2.0
[75] digest_0.6.37 R6_2.6.1
[77] mime_0.13 colorspace_2.1-1
[79] scattermore_1.2 tensor_1.5
[81] dichromat_2.0-0.1 spatstat.data_3.1-6
[83] RSQLite_2.3.9 tidyr_1.3.1
[85] generics_0.1.3 data.table_1.17.0
[87] S4Arrays_1.6.0 httr_1.4.7
[89] htmlwidgets_1.6.4 uwot_0.2.3
[91] pkgconfig_2.0.3 gtable_0.3.6
[93] blob_1.2.4 lmtest_0.9-40
[95] htmltools_0.5.8.1 dotCall64_1.2
[97] ProtGenerics_1.38.0 scales_1.4.0
[99] png_0.1-8 spatstat.univar_3.1-2
[101] rstudioapi_0.17.1 knitr_1.50
[103] reshape2_1.4.4 rjson_0.2.23
[105] uuid_1.2-1 checkmate_2.3.2
[107] nlme_3.1-168 curl_6.2.2
[109] repr_1.1.7 zoo_1.8-14
[111] cachem_1.1.0 stringr_1.5.1
[113] KernSmooth_2.23-26 parallel_4.4.3
[115] miniUI_0.1.2 foreign_0.8-90
[117] restfulr_0.0.15 pillar_1.10.2
[119] grid_4.4.3 vctrs_0.6.5
[121] RANN_2.6.2 promises_1.3.2
[123] xtable_1.8-4 cluster_2.1.8.1
[125] htmlTable_2.4.3 evaluate_1.0.3
[127] cli_3.6.5 compiler_4.4.3
[129] Rsamtools_2.22.0 rlang_1.1.6
[131] crayon_1.5.3 future.apply_1.11.3
[133] plyr_1.8.9 stringi_1.8.7
[135] viridisLite_0.4.2 deldir_2.0-4
[137] BiocParallel_1.40.0 lazyeval_0.2.2
[139] spatstat.geom_3.3-6 Matrix_1.7-3
[141] IRkernel_1.3.2 RcppHNSW_0.6.0
[143] patchwork_1.3.0 bit64_4.6.0-1
[145] future_1.40.0 ggplot2_3.5.2
[147] KEGGREST_1.46.0 shiny_1.10.0
[149] SummarizedExperiment_1.36.0 ROCR_1.0-11
[151] igraph_2.0.3 memoise_2.0.1
[153] fastmatch_1.1-6 bit_4.6.0
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working