Skip to content

Names can mismatch when there are enough data points to trigger scientific notation in getRidge #8

@drobertsicl

Description

@drobertsicl

Hi,

In high resolution data where you might have hundreds of thousands of data points in a spectra, when names are assigned at various points within getRidge NAs can be introduced because of the mismatch between eg 200000 and 2e+05. This can cause various issues downstream with NULL entries and mismatched vector lengths.

I've fixed it locally by defining a small function to force format the strings, but I don't know if it's potentially an issue elsewhere in any other functions. Apologies for the totally uncommented code, hope it helps:

getRidge <- function (localMax, iInit = ncol(localMax), step = -1, iFinal = 1, 
                      minWinSize = 5, gapTh = 3, skip = NULL, scaleToWinSize = "doubleodd") 
{
    num2str <- function(x) format(x, scientific = FALSE, trim = TRUE)
    scales <- as.numeric(colnames(localMax))
    if (is.null(scales)) scales <- seq_len(ncol(localMax))
    
    maxInd_curr <- which(localMax[, iInit] > 0)
    nMz <- nrow(localMax)
    
    if (is.null(skip)) skip <- iInit + 1
    colInd <- if (ncol(localMax) > 1) seq(iInit + step, iFinal, step) else 1
    
    ridgeList <- as.list(maxInd_curr)
    names(ridgeList) <- num2str(maxInd_curr)
    
    peakStatus <- as.list(rep(0, length(maxInd_curr)))
    names(peakStatus) <- num2str(maxInd_curr)
    
    orphanRidgeList <- NULL
    orphanRidgeName <- NULL
    nLevel <- length(colInd)
    
    for (j in seq_len(nLevel)) {
        col.j <- colInd[j]
        scale.j <- scales[col.j]
        
        if (colInd[j] == skip) {
            oldname <- names(ridgeList)
            ridgeList <- lapply(ridgeList, function(x) c(x, x[length(x)]))
            names(ridgeList) <- oldname
            next
        }
        
        if (length(maxInd_curr) == 0) {
            maxInd_curr <- which(localMax[, col.j] > 0)
            next
        }
        
        if (identical(scaleToWinSize, "doubleodd")) {
            winSize.j <- scale.j * 2 + 1
        } else if (identical(scaleToWinSize, "halve")) {
            winSize.j <- floor(scale.j / 2)
        } else if (is.function(scaleToWinSize)) {
            winSize.j <- scaleToWinSize(scale.j)
        } else {
            stop("Invalid scaleToWinSize. Use \"doubleodd\", \"halve\" or a custom function(scale.j)")
        }
        
        if (winSize.j < minWinSize) winSize.j <- minWinSize
        
        selPeak.j <- NULL
        remove.j <- NULL
        
        for (k in seq_along(maxInd_curr)) {
            ind.k <- maxInd_curr[k]
            ind.k.name <- num2str(ind.k)
            
            start.k <- max(1, ind.k - winSize.j)
            end.k <- min(nMz, ind.k + winSize.j)
            ind.curr <- which(localMax[start.k:end.k, col.j] > 0) + start.k - 1
            
            if (length(ind.curr) == 0) {
                status.k <- peakStatus[[ind.k.name]]
                if (is.null(status.k)) status.k <- gapTh + 1
                if (status.k > gapTh & scale.j >= 2) {
                    temp <- ridgeList[[ind.k.name]]
                    orphanRidgeList <- c(orphanRidgeList, list(temp[1:(length(temp) - status.k)]))
                    orphanRidgeName <- c(orphanRidgeName, paste(num2str(col.j + status.k + 1), num2str(ind.k), sep = "_"))
                    remove.j <- c(remove.j, ind.k.name)
                    next
                } else {
                    ind.curr <- ind.k
                    peakStatus[[ind.k.name]] <- status.k + 1
                }
            } else {
                peakStatus[[ind.k.name]] <- 0
                if (length(ind.curr) >= 2)
                    ind.curr <- ind.curr[which.min(abs(ind.curr - ind.k))]
            }
            
            ridgeList[[ind.k.name]] <- c(ridgeList[[ind.k.name]], ind.curr)
            selPeak.j <- c(selPeak.j, ind.curr)
        }
        
        if (length(remove.j) > 0) {
            removeInd <- which(names(ridgeList) %in% remove.j)
            ridgeList <- ridgeList[-removeInd]
            peakStatus <- peakStatus[-removeInd]
        }
        
        dupPeak.j <- unique(selPeak.j[duplicated(selPeak.j)])
        if (length(dupPeak.j) > 0) {
            removeInd <- NULL
            for (dupPeak.jk in dupPeak.j) {
                selInd <- which(selPeak.j == dupPeak.jk)
                selLen <- sapply(ridgeList[selInd], length)
                removeInd.jk <- which.max(selLen)
                removeInd <- c(removeInd, selInd[-removeInd.jk])
                orphanRidgeList <- c(orphanRidgeList, ridgeList[removeInd.jk])
                orphanRidgeName <- c(orphanRidgeName, paste(num2str(col.j), num2str(selPeak.j[removeInd.jk]), sep = "_"))
            }
            selPeak.j <- selPeak.j[-removeInd]
            ridgeList <- ridgeList[-removeInd]
            peakStatus <- peakStatus[-removeInd]
        }
        
        if (length(ridgeList) > 0) names(ridgeList) <- num2str(selPeak.j)
        if (length(peakStatus) > 0) names(peakStatus) <- num2str(selPeak.j)
        
        if (scale.j >= 2) {
            maxInd_next <- which(localMax[, col.j] > 0)
            unSelPeak.j <- maxInd_next[!(maxInd_next %in% selPeak.j)]
            newPeak.j <- as.list(unSelPeak.j)
            names(newPeak.j) <- num2str(unSelPeak.j)
            ridgeList <- c(ridgeList, newPeak.j)
            maxInd_curr <- c(selPeak.j, unSelPeak.j)
            newPeakStatus <- as.list(rep(0, length(newPeak.j)))
            names(newPeakStatus) <- num2str(unSelPeak.j)
            peakStatus <- c(peakStatus, newPeakStatus)
        } else {
            maxInd_curr <- selPeak.j
        }
    }
    
    if (length(ridgeList) > 0)
        names(ridgeList) <- paste("1", names(ridgeList), sep = "_")
    
    if (length(orphanRidgeList) > 0)
        names(orphanRidgeList) <- orphanRidgeName
    
    ridgeList <- c(ridgeList, orphanRidgeList)
    ridgeList <- lapply(ridgeList, rev)
    ridgeList <- ridgeList[!duplicated(names(ridgeList))]
    
    attr(ridgeList, "class") <- "ridgeList"
    attr(ridgeList, "scales") <- scales
    return(ridgeList)
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions