Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion R/preprocess.hrd.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
#' @param seg segmentation data
#' @return preprocessed data
preprocess.hrd<-function(seg){
# Output file that can be used in non-seqz run in the future #
outputdir = getwd()
run_name <- unique(seg$SampleID)
out_file <- paste0(outputdir,"/",run_name,"_preprocessed_hrd.txt")

seg <- seg[!seg[,2] %in% c(paste('chr',c('X','Y','x','y',23,24),sep=''),c('X','Y','x','y',23,24)),]
seg[,1] <- as.character(seg[,1])

Expand All @@ -12,6 +17,7 @@ preprocess.hrd<-function(seg){
seg[tmp[,8] > tmp[,7],8] <- tmp[tmp[,8] > tmp[,7],7]
}
seg <- shrink.seg.ai.wrapper(seg)
write.table(seg,file = out_file, sep="\t",quote = FALSE, row.names = FALSE)

return(seg)

}
14 changes: 8 additions & 6 deletions R/preprocess.seqz.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,19 @@ preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
outputdir = getwd()
}

run_name<-gsub(".*/","",gsub("_small.seqz","",gsub("gz","",seg)))
# Removing only.gz from filename, no longer assuming _small.seqz.gz as the suffix
run_name <- sub("\\.gz$", "", basename(seg))
if(chr.in.names){
extract<-sequenza.extract(seg, chromosome.list=paste('chr',c(1:24),sep=''),gamma = 60, kmin = 50)
extract<-sequenza::sequenza.extract(seg, chromosome.list=paste('chr',c(1:24),sep=''),gamma = 60, kmin = 50)
} else {
extract<-sequenza.extract(seg, chromosome.list=c(1:24),gamma = 60, kmin = 50)
extract<-sequenza::sequenza.extract(seg, chromosome.list=c(1:24),gamma = 60, kmin = 50)
}
extract.fit<-sequenza::sequenza.fit(extract, N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6, mufreq.treshold = 0.10, ratio.priority = FALSE,ploidy=ploidy01, mc.cores = 1)
extract.fit<-sequenza::sequenza.fit(extract, N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6, mufreq.threshold = 0.10, ratio.priority = FALSE,ploidy=ploidy01, mc.cores = 1)
# sequenza.results(extract, extract.fit, out.dir = getwd(),sample.id =run_name)

seg.tab <- do.call(rbind, extract$segments[extract$chromosomes])
seg.len <- (seg.tab$end.pos - seg.tab$start.pos)/1e+06
cint <- get.ci(extract.fit)
cint <- sequenza::get.ci(extract.fit)
cellularity <- cint$max.cellularity
ploidy <- cint$max.ploidy
avg.depth.ratio <- mean(extract$gc$adj[, 2])
Expand All @@ -38,7 +39,7 @@ preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
sd.ratio = seg.tab$sd.ratio, weight.ratio = seg.len, sd.Bf = seg.tab$sd.BAF,
weight.Bf = 1, ratio.priority = FALSE, CNn = 2)
seg.tab$CN <- allele.cn[,1]
allele.cn <- as.data.table(allele.cn)
allele.cn <- data.table::as.data.table(allele.cn)
#Making imput file
seg <- data.frame(SampleID = as.character(run_name), Chromosome = seg.tab$chromosome, Start_position = seg.tab$start.pos,
End_position = seg.tab$end.pos, Nprobes = 1, total_cn = allele.cn$CNt, A_cn = allele.cn$B,
Expand All @@ -48,3 +49,4 @@ preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
seg<-seg[!is.na(seg$B_cn),]
return(seg)
}

45 changes: 41 additions & 4 deletions R/scar_score.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,48 @@ scar_score<-function(seg,reference = "grch38", chr.in.names=TRUE, m,seqz=FALSE,
cat('Preprocessing finished \n')
} else {
seg<-read.table(seg,header=T, check.names = F, stringsAsFactors = F, sep="\t")
seg[,9]<-seg[,8]
seg[,8]<-seg[,7]
seg[,7]<-seg[,6]
seg[,10]<-rep(1,dim(seg)[1])
# Process headers following example input from readme, where required_cols are expected in that order
# Additionally, include Nprobes and contamination headers that are generated if seqz==TRUE
required_cols <- c(
"SampleID",
"Chromosome",
"Start_position",
"End_position",
"total_cn",
"A_cn",
"B_cn",
"ploidy"
)
missing_cols <- setdiff(required_cols, colnames(seg))

if (length(missing_cols) > 0) {
stop(paste("Missing required columns:",
paste(missing_cols, collapse = ", ")))
}

# Add Nprobes only if missing
if (!"Nprobes" %in% colnames(seg)) {
seg$Nprobes <- 1
}

# Add contamination only if missing
if (!"contamination" %in% colnames(seg)) {
seg$contamination <- 1
}

# Reorder columns to match seqz structure
seg <- seg[, c(
"SampleID",
"Chromosome",
"Start_position",
"End_position",
"Nprobes",
"total_cn",
"A_cn",
"B_cn",
"ploidy",
"contamination"
)]
}
#prep
cat('Determining HRD-LOH, LST, TAI \n')
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ Minimum requirements
- Software: R
- Operating system: Linux, OS X, Windows
- R version: 3.5.0
- **latest version of Sequenza R package** must be installed from **Bitbucket** https://bitbucket.org/sequenza_tools/sequenza
- **latest version of Sequenza R package** must be installed from **Bitbucket** https://bitbucket.org/sequenzatools/sequenza

``` r
library(devtools)
install_bitbucket('sequenza_tools/sequenza')
install_bitbucket('sequenzatools/sequenza')
```

Installation
Expand Down Expand Up @@ -219,3 +219,4 @@ Favero, F., T. Joshi, A. M. Marquard, N. J. Birkbak, M. Krzystanek, Q. Li, Z. Sz
Popova, T., E. Manie, G. Rieunier, V. Caux-Moncoutier, C. Tirapo, T. Dubois, O. Delattre, et al. 2012. “Ploidy and large-scale genomic instability consistently identify basal-like breast carcinomas with BRCA1/2 inactivation.” *Cancer Res.* 72 (21): 5454–62.

Van Loo, P., S. H. Nordgard, O. C. Lingj?rde, H. G. Russnes, I. H. Rye, W. Sun, V. J. Weigman, et al. 2010. “Allele-specific copy number analysis of tumors.” *Proc. Natl. Acad. Sci. U.S.A.* 107 (39): 16910–5.