Skip to content

nCount_ATAC decreases after TSSEnrichment(ATAC, fast = FALSE) for combined Seurat object (data from several samples of snATAC) #1962

@zyll123

Description

@zyll123

Dear authors, I find the nCount_ATAC in the metadata will decrease for all the cells after running TSSEnrichment(ATAC, fast = FALSE) for a Seurat object with snATAC data from several samples (with separate fragment.tsv.gz). I checked the original code for TSSEnrichment but have no idea why this will happen.

I have tried on my own data and public data, the same problem emerges. The problem will only occur when fast set to FALSE and with number of samples >=2.

public data used:

  1. https://storage.googleapis.com/linnarsson-lab-human/ATAC_dev/10X/10X290_1_ABCD_1.tar.gz
  2. https://storage.googleapis.com/linnarsson-lab-human/ATAC_dev/10X/10X346_2_ABCD_2.tar.gz
# 1. QC on single sample =========================================
# get gene annotations for hg38
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))

counts=Read10X_h5(paste0(Parent_Dir,df$filename[i],"filtered_feature_bc_matrix.h5"))

# create a Seurat object containing the RNA adata
S290_1 <- CreateSeuratObject(
  counts = counts$`Gene Expression`,
  assay = "RNA"
)

S290_1[["percent.mito"]] <- PercentageFeatureSet(S290_1, pattern = "^MT-")
S290_1[["percent.rb"]] <- PercentageFeatureSet(S290_1, pattern = "^RP[SL]")
genes <- rownames(S290_1)
remain <- genes[-grep(pattern = "^GM1|^GM2|^GM3|^GM4|^GM5|^GM6|^GM7|^GM8|^GM9|RIK", x = genes)]
S290_1 <- subset(S290_1, features = remain)

# create ATAC assay and add it to the object
S290_1[["ATAC"]] <- CreateChromatinAssay(
  counts = counts$Peaks,
  sep = c(":", "-"),
  fragments = fragpath,
  annotation = annotation
)

S290_1@meta.data$orig.ident <- paste0('S',df$Sample_ID[[i]])
S290_1 <- NucleosomeSignal(S290_1)
S290_1 <- TSSEnrichment(S290_1)
S290_1$blacklist_fraction <- FractionCountsInRegion(object = S290_1, assay = 'ATAC',regions = blacklist_hg38)

# filter out low quality cells
max_nFeature_RNA = quantile(S290_1$nFeature_RNA, 0.95)
S290_1 <- subset(
  x = S290_1,
  subset = nCount_ATAC < 100000 &
	nFeature_RNA < max_nFeature_RNA &
	nCount_ATAC > 500 &
	nFeature_RNA > 500 &
	nucleosome_signal < 2 &
	TSS.enrichment > 2 &
	#blacklist_fraction < 0.05 & #暂时没有进行这项质控
	percent.mito < 5 & percent.rb < 10
)


meta_data <- S290_1@meta.data
meta_dir=paste0(Parent_Dir,"Meta/",S290_1@meta.data$orig.ident[1],"_meta_0603.csv")
# 导出为 CSV 文件
write.csv(meta_data, file = meta_dir, row.names = TRUE)

2. Combine two samples =======================================
peaks1=read.table(
        file = paste0(Parent_Dir,'10X290_1_ABCD_1/atac_peaks.bed'),
        col.names = c("chr", "start", "end")
    )
peaks1 <- makeGRangesFromDataFrame(peaks1)

peaks2=read.table(
        file = paste0(Parent_Dir,'10X346_2_ABCD_2/atac_peaks.bed'),
        col.names = c("chr", "start", "end")
    )
peaks2 <- makeGRangesFromDataFrame(peaks2)

chr<-c()
for (i in 1:22){
  chr<-c(chr, paste0("chr",i))
}
chr<-c(chr,"chrX")
chr<-c(chr,"chrY")
peaks1<-peaks1[seqnames(peaks1) %in% chr]
peaks2<-peaks2[seqnames(peaks2) %in% chr]

peaks=list(peaks1,peaks2)
peaks_gl       <- GRangesList(peaks)           # list -> GRangesList
all_peaks      <- unlist(peaks_gl, use.names=FALSE)
combined.peaks <- reduce(all_peaks)
# Filter out bad peaks based on length
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths  < 10000 & peakwidths > 20]

Meta1=read.table(
        file = paste0(Parent_Dir,'Meta/S290_1_meta_0603.csv'),
        stringsAsFactors = FALSE,
        sep = ",",
        header = TRUE,
        row.names = 1
    )
Meta2=read.table(
        file = paste0(Parent_Dir,'Meta/S346_2_meta_0603.csv'),
        stringsAsFactors = FALSE,
        sep = ",",
        header = TRUE,
        row.names = 1
    )
frags1= CreateFragmentObject(
        path = paste0(Parent_Dir,"10X290_1_ABCD_1/atac_fragments.tsv.gz"), 
        cells = rownames(Meta1) #根据metadata里的细胞保留
    )
frags2= CreateFragmentObject(
        path = paste0(Parent_Dir,"10X346_2_ABCD_2/atac_fragments.tsv.gz"), 
        cells = rownames(Meta2) #根据metadata里的细胞保留
    )
counts1=FeatureMatrix(
        fragments = frags1,
        features = combined.peaks,
        cells = rownames(Meta1)
    )
counts2=FeatureMatrix(
        fragments = frags2,
        features = combined.peaks,
        cells = rownames(Meta2)
    )
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))
# seqlevelsStyle(annotation) <- 'UCSC'
genome(annotation) <- "hg38"

assay1=CreateChromatinAssay(counts1, fragments = frags1,annotation = annotation)
assay2=CreateChromatinAssay(counts2, fragments = frags2,annotation = annotation)
obj1=CreateSeuratObject(assay1, assay = "ATAC", meta.data=Meta1)
obj2=CreateSeuratObject(assay2, assay = "ATAC", meta.data=Meta2)
# merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
  x = obj1,
  y = obj2,
  add.cell.ids = c('1','2')
)

# 3. check the nCount_ATAC ===========================
summary(combined@meta.data$nCount_ATAC)
Meta=combined@meta.data
table(Meta[Meta$nCount_ATAC<500,'orig.ident'])

# 4. run the TssEnrichment =================================
ATAC=combined
ATAC = TSSEnrichment(ATAC, fast = FALSE)
summary(ATAC@meta.data$nCount_ATAC)
Meta=ATAC@meta.data
table(Meta[Meta$nCount_ATAC<500,'orig.ident'])

table(ATAC@meta.data$nCount_ATAC<combined@meta.data$nCount_ATAC)

You will find the table([email protected]$nCount_ATAC<[email protected]$nCount_ATAC) returns TRUE

sessionInfo():

R version 4.4.3 (2025-02-28)
Platform: x86_64-conda-linux-gnu
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /public/home/yzhang/micromamba/envs/ATAC2/lib/libopenblasp-r0.3.29.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=zh_CN.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=zh_CN.UTF-8        LC_COLLATE=zh_CN.UTF-8    
 [5] LC_MONETARY=zh_CN.UTF-8    LC_MESSAGES=zh_CN.UTF-8   
 [7] LC_PAPER=zh_CN.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C       

time zone: Asia/Shanghai
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] BSgenome.Hsapiens.UCSC.hg38_1.4.5 BSgenome_1.74.0                  
 [3] rtracklayer_1.66.0                BiocIO_1.16.0                    
 [5] Biostrings_2.74.0                 XVector_0.46.0                   
 [7] EnsDb.Hsapiens.v86_2.99.0         ensembldb_2.30.0                 
 [9] AnnotationFilter_1.30.0           GenomicFeatures_1.58.0           
[11] AnnotationDbi_1.68.0              Biobase_2.66.0                   
[13] GenomicRanges_1.58.0              GenomeInfoDb_1.42.0              
[15] IRanges_2.40.0                    S4Vectors_0.44.0                 
[17] BiocGenerics_0.52.0               Seurat_5.3.0                     
[19] SeuratObject_5.1.0                sp_2.2-0                         
[21] Signac_1.14.0                    

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.22            splines_4.4.3              
  [3] later_1.4.2                 pbdZMQ_0.3-14              
  [5] bitops_1.0-9                tibble_3.2.1               
  [7] polyclip_1.10-7             rpart_4.1.24               
  [9] XML_3.99-0.17               fastDummies_1.7.5          
 [11] lifecycle_1.0.4             globals_0.17.0             
 [13] lattice_0.22-7              MASS_7.3-64                
 [15] backports_1.5.0             magrittr_2.0.3             
 [17] rmarkdown_2.29              Hmisc_5.2-3                
 [19] plotly_4.10.4               yaml_2.3.10                
 [21] httpuv_1.6.15               sctransform_0.4.1          
 [23] spam_2.11-1                 spatstat.sparse_3.1-0      
 [25] reticulate_1.42.0           cowplot_1.1.3              
 [27] pbapply_1.7-2               DBI_1.2.3                  
 [29] RColorBrewer_1.1-3          abind_1.4-5                
 [31] zlibbioc_1.52.0             Rtsne_0.17                 
 [33] purrr_1.0.4                 biovizBase_1.54.0          
 [35] RCurl_1.98-1.16             nnet_7.3-20                
 [37] VariantAnnotation_1.52.0    GenomeInfoDbData_1.2.13    
 [39] ggrepel_0.9.6               irlba_2.3.5.1              
 [41] listenv_0.9.1               spatstat.utils_3.1-3       
 [43] goftest_1.2-3               RSpectra_0.16-2            
 [45] spatstat.random_3.3-3       fitdistrplus_1.2-2         
 [47] parallelly_1.43.0           DelayedArray_0.32.0        
 [49] codetools_0.2-20            RcppRoll_0.3.1             
 [51] tidyselect_1.2.1            UCSC.utils_1.2.0           
 [53] farver_2.1.2                matrixStats_1.5.0          
 [55] base64enc_0.1-3             spatstat.explore_3.4-2     
 [57] GenomicAlignments_1.42.0    jsonlite_2.0.0             
 [59] Formula_1.2-5               progressr_0.15.1           
 [61] ggridges_0.5.6              survival_3.8-3             
 [63] tools_4.4.3                 ica_1.0-3                  
 [65] Rcpp_1.0.14                 glue_1.8.0                 
 [67] SparseArray_1.6.0           gridExtra_2.3              
 [69] xfun_0.52                   MatrixGenerics_1.18.0      
 [71] IRdisplay_1.1               dplyr_1.1.4                
 [73] withr_3.0.2                 fastmap_1.2.0              
 [75] digest_0.6.37               R6_2.6.1                   
 [77] mime_0.13                   colorspace_2.1-1           
 [79] scattermore_1.2             tensor_1.5                 
 [81] dichromat_2.0-0.1           spatstat.data_3.1-6        
 [83] RSQLite_2.3.9               tidyr_1.3.1                
 [85] generics_0.1.3              data.table_1.17.0          
 [87] S4Arrays_1.6.0              httr_1.4.7                 
 [89] htmlwidgets_1.6.4           uwot_0.2.3                 
 [91] pkgconfig_2.0.3             gtable_0.3.6               
 [93] blob_1.2.4                  lmtest_0.9-40              
 [95] htmltools_0.5.8.1           dotCall64_1.2              
 [97] ProtGenerics_1.38.0         scales_1.4.0               
 [99] png_0.1-8                   spatstat.univar_3.1-2      
[101] rstudioapi_0.17.1           knitr_1.50                 
[103] reshape2_1.4.4              rjson_0.2.23               
[105] uuid_1.2-1                  checkmate_2.3.2            
[107] nlme_3.1-168                curl_6.2.2                 
[109] repr_1.1.7                  zoo_1.8-14                 
[111] cachem_1.1.0                stringr_1.5.1              
[113] KernSmooth_2.23-26          parallel_4.4.3             
[115] miniUI_0.1.2                foreign_0.8-90             
[117] restfulr_0.0.15             pillar_1.10.2              
[119] grid_4.4.3                  vctrs_0.6.5                
[121] RANN_2.6.2                  promises_1.3.2             
[123] xtable_1.8-4                cluster_2.1.8.1            
[125] htmlTable_2.4.3             evaluate_1.0.3             
[127] cli_3.6.5                   compiler_4.4.3             
[129] Rsamtools_2.22.0            rlang_1.1.6                
[131] crayon_1.5.3                future.apply_1.11.3        
[133] plyr_1.8.9                  stringi_1.8.7              
[135] viridisLite_0.4.2           deldir_2.0-4               
[137] BiocParallel_1.40.0         lazyeval_0.2.2             
[139] spatstat.geom_3.3-6         Matrix_1.7-3               
[141] IRkernel_1.3.2              RcppHNSW_0.6.0             
[143] patchwork_1.3.0             bit64_4.6.0-1              
[145] future_1.40.0               ggplot2_3.5.2              
[147] KEGGREST_1.46.0             shiny_1.10.0               
[149] SummarizedExperiment_1.36.0 ROCR_1.0-11                
[151] igraph_2.0.3                memoise_2.0.1              
[153] fastmatch_1.1-6             bit_4.6.0  

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions