sztup · juliawiggeshoff · Feb 10, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/R/preprocess.hrd.R b/R/preprocess.hrd.R
@@ -3,6 +3,11 @@
 #' @param seg segmentation data
 #' @return preprocessed data
 preprocess.hrd<-function(seg){
+  # Output file that can be used in non-seqz run in the future  # 
+  outputdir = getwd()
+  run_name <- unique(seg$SampleID)
+  out_file <- paste0(outputdir,"/",run_name,"_preprocessed_hrd.txt")
+
   seg <- seg[!seg[,2] %in% c(paste('chr',c('X','Y','x','y',23,24),sep=''),c('X','Y','x','y',23,24)),]
   seg[,1] <- as.character(seg[,1])
 
@@ -12,6 +17,7 @@ preprocess.hrd<-function(seg){
     seg[tmp[,8] > tmp[,7],8]  <- tmp[tmp[,8] > tmp[,7],7]
   }
   seg <- shrink.seg.ai.wrapper(seg)
+  write.table(seg,file = out_file, sep="\t",quote = FALSE, row.names = FALSE)
+
   return(seg)
-
 }
diff --git a/R/preprocess.seqz.R b/R/preprocess.seqz.R
@@ -15,18 +15,19 @@ preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
     outputdir = getwd()
   }
 
-  run_name<-gsub(".*/","",gsub("_small.seqz","",gsub("gz","",seg)))
+  # Removing only.gz from filename, no longer assuming _small.seqz.gz as the suffix 
+  run_name <- sub("\\.gz$", "", basename(seg))
   if(chr.in.names){
-  extract<-sequenza.extract(seg, chromosome.list=paste('chr',c(1:24),sep=''),gamma = 60, kmin = 50)
+  extract<-sequenza::sequenza.extract(seg, chromosome.list=paste('chr',c(1:24),sep=''),gamma = 60, kmin = 50)
    } else {
-  extract<-sequenza.extract(seg, chromosome.list=c(1:24),gamma = 60, kmin = 50)
+  extract<-sequenza::sequenza.extract(seg, chromosome.list=c(1:24),gamma = 60, kmin = 50)
    }
-  extract.fit<-sequenza::sequenza.fit(extract, N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6, mufreq.treshold = 0.10, ratio.priority = FALSE,ploidy=ploidy01, mc.cores = 1)
+  extract.fit<-sequenza::sequenza.fit(extract, N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6, mufreq.threshold = 0.10, ratio.priority = FALSE,ploidy=ploidy01, mc.cores = 1)
   #  sequenza.results(extract, extract.fit, out.dir = getwd(),sample.id =run_name)
 
   seg.tab <- do.call(rbind, extract$segments[extract$chromosomes])
   seg.len <- (seg.tab$end.pos - seg.tab$start.pos)/1e+06
-  cint <- get.ci(extract.fit)
+  cint <- sequenza::get.ci(extract.fit)
   cellularity <- cint$max.cellularity
   ploidy <- cint$max.ploidy
   avg.depth.ratio <- mean(extract$gc$adj[, 2])
@@ -38,7 +39,7 @@ preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
                                    sd.ratio = seg.tab$sd.ratio, weight.ratio = seg.len, sd.Bf = seg.tab$sd.BAF,
                                    weight.Bf = 1, ratio.priority = FALSE, CNn = 2)
   seg.tab$CN <- allele.cn[,1]
-  allele.cn <- as.data.table(allele.cn)
+  allele.cn <- data.table::as.data.table(allele.cn)
   #Making imput file
   seg <- data.frame(SampleID = as.character(run_name), Chromosome = seg.tab$chromosome, Start_position = seg.tab$start.pos,
                     End_position = seg.tab$end.pos, Nprobes = 1, total_cn = allele.cn$CNt, A_cn = allele.cn$B,
@@ -48,3 +49,4 @@ preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
   seg<-seg[!is.na(seg$B_cn),]
   return(seg)
 }
+
diff --git a/R/scar_score.R b/R/scar_score.R
@@ -43,11 +43,48 @@ scar_score<-function(seg,reference = "grch38", chr.in.names=TRUE, m,seqz=FALSE,
     cat('Preprocessing finished \n')
   } else {
     seg<-read.table(seg,header=T, check.names = F, stringsAsFactors = F, sep="\t")
-    seg[,9]<-seg[,8]
-    seg[,8]<-seg[,7]
-    seg[,7]<-seg[,6]
-    seg[,10]<-rep(1,dim(seg)[1])
+    # Process headers following example input from readme, where required_cols are expected in that order
+    # Additionally, include Nprobes and contamination headers that are generated if seqz==TRUE
+    required_cols <- c(
+    "SampleID",
+    "Chromosome",
+    "Start_position",
+    "End_position",
+    "total_cn",
+    "A_cn",
+    "B_cn",
+    "ploidy"
+    )
+    missing_cols <- setdiff(required_cols, colnames(seg))
 
+    if (length(missing_cols) > 0) {
+      stop(paste("Missing required columns:",
+                 paste(missing_cols, collapse = ", ")))
+    }
+
+    # Add Nprobes only if missing
+    if (!"Nprobes" %in% colnames(seg)) {
+      seg$Nprobes <- 1
+    }
+
+    # Add contamination only if missing
+    if (!"contamination" %in% colnames(seg)) {
+      seg$contamination <- 1
+    }
+
+    # Reorder columns to match seqz structure
+    seg <- seg[, c(
+      "SampleID",
+      "Chromosome",
+      "Start_position",
+      "End_position",
+      "Nprobes",
+      "total_cn",
+      "A_cn",
+      "B_cn",
+      "ploidy",
+      "contamination"
+    )]
   }
   #prep
   cat('Determining HRD-LOH, LST, TAI \n')

diff --git a/README.md b/README.md
@@ -41,11 +41,11 @@ Minimum requirements
 -   Software: R
 -   Operating system: Linux, OS X, Windows
 -   R version: 3.5.0
--   **latest version of Sequenza R package** must be installed from **Bitbucket** https://bitbucket.org/sequenza_tools/sequenza   
+-   **latest version of Sequenza R package** must be installed from **Bitbucket** https://bitbucket.org/sequenzatools/sequenza   
 
 ``` r
 library(devtools)
-install_bitbucket('sequenza_tools/sequenza')
+install_bitbucket('sequenzatools/sequenza')
 ```  
 
 Installation
@@ -219,3 +219,4 @@ Favero, F., T. Joshi, A. M. Marquard, N. J. Birkbak, M. Krzystanek, Q. Li, Z. Sz
 Popova, T., E. Manie, G. Rieunier, V. Caux-Moncoutier, C. Tirapo, T. Dubois, O. Delattre, et al. 2012. “Ploidy and large-scale genomic instability consistently identify basal-like breast carcinomas with BRCA1/2 inactivation.” *Cancer Res.* 72 (21): 5454–62.
 
 Van Loo, P., S. H. Nordgard, O. C. Lingj?rde, H. G. Russnes, I. H. Rye, W. Sun, V. J. Weigman, et al. 2010. “Allele-specific copy number analysis of tumors.” *Proc. Natl. Acad. Sci. U.S.A.* 107 (39): 16910–5.
+