Skip to content
2 changes: 1 addition & 1 deletion R/converters.R
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ read.juncs = function(rafile,
ra = readRDS(rafile)
## validity check written for "junctions" class
return(Junction$new(ra))
} else if (grepl('(.bedpe$)', rafile)){
} else if (grepl('bedpe(\\.gz)?$', rafile)){
ra.path = rafile
cols = c('chr1', 'start1', 'end1', 'chr2', 'start2', 'end2', 'name', 'score', 'str1', 'str2')

Expand Down
277 changes: 273 additions & 4 deletions R/eventCallers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1051,9 +1051,13 @@ annotate_walks = function(walks)
#' Shortcut to call all simple and complex event types on JaBbA graph using standard settings on all event callers.
#'
#' @param gg gGraph
#' @param verbose
#' @param mark
#' @param QRP annotate quasi reciprocal events
#' @param seismic (logical or character) if TRUE then annotates seismic amplifications using a graph-based approach, if 'Rosswog' then the Rosswog et al. algorithm is used and events are annotated as seismic.rosswog, and if 'both' then will annotate both "seismic" (graph-based) and "seismic.rosswog" (Rosswog et al. algorithm)
#' @return gGraph with nodes and edges annotated with complex events in their node and edge metadata and in the graph meta data field $events
#' @export
events = function(gg, verbose = TRUE, mark = FALSE, QRP = FALSE)
events = function(gg, verbose = TRUE, mark = FALSE, QRP = FALSE, seismic = FALSE)
{
gg = gg %>% simple(mark = TRUE)
if (verbose)
Expand Down Expand Up @@ -1088,7 +1092,20 @@ events = function(gg, verbose = TRUE, mark = FALSE, QRP = FALSE)
if (verbose)
message('Finished qrp')
}


if (seismic == TRUE | seismic == 'both' | seismic == 'Rosswog'){
Rosswog = FALSE
if (seismic == 'Rosswog'){
Rosswog = TRUE
}
gg = gg %>% seismic(mark = TRUE, Rosswog = Rosswog)
if (verbose)
message('Finished seismic')
if (seismic == 'both'){
gg = gg %>% seismic(mark = TRUE, Rosswog = TRUE)
}
}

ev = rbind(
gg$meta$simple,
gg$meta$chromothripsis,
Expand All @@ -1108,6 +1125,16 @@ events = function(gg, verbose = TRUE, mark = FALSE, QRP = FALSE)
gg$meta$qrpmin,
gg$meta$qrpmix, fill = TRUE)[, ev.id := seq_len(.N)]
}
if (seismic == TRUE | seismic == 'both'){
ev = rbind(
ev,
gg$meta$seismic, fill = TRUE)[, ev.id := seq_len(.N)]
}
if (seismic == 'Rosswog'){
ev = rbind(
ev,
gg$meta$seismic.rosswog, fill = TRUE)[, ev.id := seq_len(.N)]
}

gg$set(events = ev)
return(gg)
Expand Down Expand Up @@ -2916,6 +2943,121 @@ qrp = function(gg, thresh = 1e6, max.small = 1e5,
return(gg)

}


#' @name seismic
#' @description
#' Identification of seismic amplicons according to the definitions in Rosswog et al.
#' should be used.
#'
#' @param gg gGraph
#' @param amp.thresh (numeric) multiply of rounded ploidy to use for detection of amplification (2). The threshold for amplification is set to >= rounded.ploidy * amp.thresh + 1 where rounded.ploidy is either 2 or 4 (determined using ploidy.thresh).
#' @param ploidy.thresh (numeric) threshold to use above which ploidy is considered 4 (and up to it, 2). Rosswog et al. used 2 and hence this is the default value.
#' @param min.internal (numeric) minimal number of internal junctions in a seismic amplification (14).
#' @param mc.cores (numeric) number of cores to use (1)
#' @param mark (logical) nodes and edges with color if they are included in a seismic amplification (TRUE)
#' @param mark.col (character) color to use (if mark set to TRUE) to mark edges and nodes (purple).
#' @param Rosswog (logical) run the original algorithm by Rosswog et al.
#' @param ... additional parameters for the Rosswog et al. algorithm (see help menu for gGnome::seismic_rosswog()). This is only relevant is Rosswog = TRUE.
#'
#' @return gg
#' @export
seismic = function(gg, amp.thresh = 2, ploidy.thresh = 2, min.internal = 14,
mc.cores = 1, mark = TRUE, mark.col = 'purple', Rosswog = FALSE,
...)
{
gg$nodes$mark(seismic = as.integer(NA))
gg$edges$mark(seismic = as.integer(NA))
gg$set(seismic = data.table())
if (!('cn' %in% names(mcols(gg$nodes$gr)))){
stop('In order to call seismic amplifications gGraph nodes must contain "cn" values')
}
if (is.null(gg$meta$ploidy)){
ploidy = gg$nodes$dt[!is.na(cn), sum(cn*as.numeric(width))/sum(as.numeric(width))]
} else {
ploidy = gg$meta$ploidy
}

if (isTRUE(Rosswog)){
message('Running the original algorithm by Rosswog et al.')
rosswog_calls = seismic_rosswog(gg, ploidy, ...)
gg = annotate_rosswog_calls(gg, rosswog_calls)
} else {

# following Rosswog et al. we use 5 as the threshold for samples with ploidy up to 2
# and 9 as the threshold for samples with ploidy above 3
amp.thresh = ifelse(ploidy <= ploidy.thresh, 2 * amp.thresh + 1, 4 * amp.thresh + 1)

amplified = (gg$nodes$dt$cn) >= amp.thresh

gg$clusters(amplified, mode = 'strong')

cids = unique(gg$nodes$dt$cluster)
cids = cids[!is.na(cids)]

edt = mclapply(cids, function(cid){
sg = gg[cluster == cid]
if (length(sg$edges[type == 'ALT']) < 2){
return(NULL)
}

sg.no.alt = sg$clone()
# mark regions with rid (region ID)
eids = sg.no.alt$edges$dt$type == 'REF'
sg.no.alt$clusters(j = eids)
sg$nodes$mark(rid = sg.no.alt$nodes$dt$cluster)

# for each ALT edge check if both left and right node are in the same region (i.e. it is internal)
internal = sg$edges[type == 'ALT']$left$dt$rid == sg$edges[type == 'ALT']$right$dt$rid

# if it is not internal then check if it falls on the edge of regions
ndt = sg$nodes$dt
left.ids = ndt[, min(node.id), by = rid]$V1
right.ids = ndt[, max(node.id), by = rid]$V1
edge.nodes = c(left.ids, right.ids)

flanking = (sg$edges[type == 'ALT']$left$dt$node.id %in% edge.nodes) & (sg$edges[type == 'ALT']$right$dt$node.id %in% edge.nodes)

other = !internal & !flanking

return(data.table(cid = cid, internal.junc = sum(internal), flanking.junc = sum(flanking), other.junc = sum(other)))
}, mc.cores = mc.cores)

edt = rbindlist(edt)

if (edt[,.N] > 0){
edt = edt[internal.junc >= min.internal]
cids = edt$cid
ev.ids = seq_along(cids)
if (length(cids) > 0){
for (ev.id in ev.ids){
# annotate all edges that have both breakpoints whithin the cluster
cid = cids[ev.id]
e.idx = which(gg$edges[type == 'ALT']$left$dt$cluster == cid & gg$edges[type == 'ALT']$right$dt$cluster == cid)
eids = gg$edges[type == 'ALT'][e.idx]$dt$edge.id
gg$edges[eids]$mark(seismic = ev.id)

# annotate nodes
gg$nodes[cluster == cid]$mark(seismic = ev.id)
}

names(ev.ids) = as.character(cids)
edt[, footprint := sapply(ev.ids[as.character(cid)], function(x){nodes2footprint.str(gg$nodes[seismic == x])})]
edt$type = 'seismic'
# change the cluster ID column to "strong" to emphesize that this is a strongly connected cluster
setnames(edt, 'cid', 'strong')
gg$set(seismic = edt)
}
}
}

if (mark){
gg$edges[!is.na(seismic)]$mark(col = mark.col)
gg$nodes[!is.na(seismic)]$mark(col = mark.col)
}

return(gg)
}

#' @name events.to.gr
#' @title Extract event annotation as a GRanges
Expand All @@ -2935,8 +3077,135 @@ events.to.gr = function(gg){
warning('No events annotated in this gGraph')
return(GRanges())
}
ggrl = parse.grl(gg$meta$events$footprint)
mcols(ggrl) = gg$meta$events
return(events.dt.to.gr(gg$meta$events))
}

#' @name events.dt.to.gr
#' @title convert events annotated data.table to a GRanges
#'
#' @author Alon Shaiber
#' @param ev (data.table) events annotation data.table (gGraph$meta$events)
#' @return GRanges containing ranges of annotated events along with all metadata from gg$meta$events
#' @export
events.dt.to.gr = function(ev){
ggrl = parse.grl(ev$footprint)
mcols(ggrl) = ev
ggr = grl.unlist(ggrl)
return(ggr)
}


#' @name seismic_rosswog
#' @description
#' Identification of seismic amplicons using the algorithm provided by Rosswog et al. 2021
#'
#' @param gg gGraph
#' @param ploidy ploidy value
#' @param rosswog_dir path to a local clone of the github repository provided by Rosswog et al. (https://github.com/seismicon/SeismicAmplification)
#' @param chrBands GRanges object (see https://github.com/seismicon/SeismicAmplification for details). If nothing is provided then the hg19 version (with no chr prefix) is used.
#' @param minInternalSVs (numeric) see https://github.com/seismicon/SeismicAmplification (14 by default)
#' @param cnvTol (numeric) see https://github.com/seismicon/SeismicAmplification (5e3 by default)
#'
#' @return list(amplicons = GRanges, svs = data.table)
#' @export
seismic_rosswog = function(gg, ploidy = 2, rosswog_dir = NULL, chrBands = NULL,
minInternalSVs = 14, cnvTol = 5e3){
if (!is.null(rosswog_dir)){
if (dir.exists(rosswog_dir)){
rosswog_scripts = paste0(rosswog_dir, '/seismic_amplification_detection/seismic_amplification_detection.R')
if (file.exists(rosswog_scripts)){
source(rosswog_scripts)
breaks = gg$nodes$gr
if (!('cn' %in% names(mcols(gg$nodes$gr)))){
stop('In order to call seismic amplifications gGraph nodes must contain "cn" values')
}
alts = gg$junctions[type == 'ALT']
if (length(alts) == 0){
message('No junctions found in the graph so no seismic amplification')
return(list(amplicons = GRanges(), svs = data.table()))
}
jgrl = alts$grl
chrs = as.vector(unlist(seqnames(jgrl)))
starts = unlist(start(jgrl))
odd = seq(1,2*length(alts),2)
even = seq(2,2*length(alts),2)
jdt = data.table(chr1 = chrs[odd],
chr2 = chrs[even],
bp1 = starts[odd],
bp2 = starts[even],
edge.id = alts$dt$edge.id)
if (is.null(chrBands)){
message('No chrBands provided so using the default hg19 (with no chr prefix)')
chrBands = fread(paste0(rosswog_dir, '/seismic_amplification_detection/chromosome_bands_hg19.txt'))
chrBands = chrBands[, `:=`(seqnames = gsub('chr', '', chrom),
start = chromStart,
end = chromEnd)]
chrBands = chrBands[seqnames %in% seqlevels(breaks)]
bands = trim(gUtils::dt2gr(chrBands,
seqlengths = seqlengths(breaks)[intersect(chrBands$seqnames, seqlevels(breaks))]))
}
rosswog_calls = detect_seismic_amplification(cnv=breaks, sv=jdt, chrBands=bands,
ploidy = ploidy,
minInternalSVs = minInternalSVs,
cnvTol = cnvTol)
return(rosswog_calls)
}
}
}
}


#' @name annotate_rosswog_calls
#' @description
#' Takes the output of the Rosswog et al. seismic amplification detection algorithm
#' and annotates the gGraph accordingly
#'
#' @param gg gGraph
#' @param rosswog_calls the output of seismic_rosswog()
#' @param mark.as.rosswog (logical) if set to TRUE then seismic events are annotated as "seismic.rosswog" (TRUE), otherwise annotated as "seismic". This is meant to allow the user to annotate both graph-based "seismic" events and Rosswog et al. "seismic.rosswog" events
#' @return gg
annotate_rosswog_calls = function(gg, rosswog_calls, mark.as.rosswog = TRUE){
# mark edges and nodes
if (length(rosswog_calls$amplicons) > 0){
if (isTRUE(mark.as.rosswog)){
gg$edges[rosswog_calls$svs$edge.id]$mark(seismic.rosswog = rosswog_calls$svs$amplicon_id)
sgr = (gg$nodes$gr %*% rosswog_calls$amplicons[,'id'])
gg$nodes[sgr$node.id]$mark(seismic.rosswog = sgr$id)
} else {
gg$edges[rosswog_calls$svs$edge.id]$mark(seismic = rosswog_calls$svs$amplicon_id)
sgr = (gg$nodes$gr %*% rosswog_calls$amplicons[,'id'])
gg$nodes[sgr$node.id]$mark(seismic = sgr$id)
}
cols = c('id', 'nSegments', 'medianCN', 'maxCN', 'size_amplicon', 'nChrs_amplicon', 'nRegions_amplicon',
'medianCN_amplicon', 'cnSpan_amplicon', 'cnStates_amplicon', 'nSegments_amplicon',
'nSVs_amplicon', 'nSVsInternal_amplicon')
edt = gr2dt(rosswog_calls$amplicons %Q% (!duplicated(id)))[, ..cols]
setnames(edt, 'id', 'cid') # avoid using "id" field
name = ifelse(isTRUE(mark.as.rosswog), 'seismic.rosswog', 'seismic')
edt[, footprint := sapply(cid, function(x){nodes2footprint.str(gg$nodes[get(name) == x])})]
edt[, ev.id := .I]
edt$type = name
# change the cluster ID column to "strong" to emphesize that this is a strongly connected cluster
if (isTRUE(mark.as.rosswog)){
gg$set(seismic.rosswog = edt)
} else {
gg$set(seismic = edt)
}
}
return(gg)
}

#' @name nodes2footprint.str
#' @description
#' Takes a gNode and returns character footprint parsable by parse.gr
#'
#' @param n gNode
#' @param stripstrand (logical) string the strand information (TRUE)
#' @return character
nodes2footprint.str = function(n, stripstrand = TRUE){
footprint = n$footprint
if (stripstrand){
footprint = gr.stripstrand(footprint)
}
return(paste(gr.string(footprint), collapse = ';'))
}
15 changes: 12 additions & 3 deletions tests/testthat/test_gGnome_event_callers.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ library(gTrack)
gg.jabba = gG(jabba = system.file('extdata/hcc1954', 'jabba.rds', package="gGnome"))

test_that('event caller', {
## run events caller with qrp
############# not running this currently since it is erroring out (See: https://app.travis-ci.com/github/mskilab/gGnome/builds/237955222 )
gg.events = events(gg.jabba, QRP = TRUE)
## run events caller with qrp and with seismic
gg.events = events(gg.jabba, QRP = TRUE, seismic = TRUE)
expect_is(gg.events$meta$events, 'data.table')
expect_true(any(grepl('qrp', gg.events$meta$events$type)))
expect_true(any(grepl('seismic', gg.events$meta$events$type)))

## run events caller without qrp
gg.events.no.qrp = events(gg.jabba, QRP = FALSE)
Expand Down Expand Up @@ -109,3 +109,12 @@ test_that('microhomology', {
expect_error(microhomology(gg, fa))

})

test_that('seismic', {
# test the Rosswog et al. algorithm version
# clone the repo
seismic_dir = paste0(tempdir(), '/', 'SeismicAmplification')
system(paste0('git clone https://github.com/ShaiberAlon/SeismicAmplification.git ', seismic_dir))
s = seismic(gg.jabba, Rosswog = TRUE, rosswog_dir = seismic_dir)
expect_error(seismic(gG()))
})