Skip to content

Commit e9d87de

Browse files
authored
Merge pull request #527 from GoekeLab/devel_pre
Fix xgboost related issues and update devel branch
2 parents 3248d3c + b834f14 commit e9d87de

24 files changed

+16340
-16215
lines changed

.github/workflows/check-bioc.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222

2323
on:
2424
push:
25-
branches: [main, devel]
25+
branches: [devel, devel_pre, devel_pre_v4]
2626
pull_request:
27-
branches: [main, devel]
27+
branches: [devel, devel_pre, devel_pre_v4]
2828

2929
name: R-CMD-check-bioc
3030

@@ -54,8 +54,8 @@ jobs:
5454
fail-fast: false
5555
matrix:
5656
config:
57-
- { os: ubuntu-latest, r: '4.4.2', bioc: '3.20', cont: "bioconductor/bioconductor_docker:RELEASE_3_20", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
58-
- { os: macOS-latest, r: '4.4.2', bioc: '3.20'}
57+
- { os: ubuntu-latest, r: '4.5.2', bioc: '3.22', cont: "bioconductor/bioconductor_docker:RELEASE_3_22", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
58+
- { os: macOS-latest, r: '4.5.2', bioc: '3.22'}
5959
##- { os: windows-latest, r: '4.3', bioc: '3.18'}
6060
## Check https://github.com/r-lib/actions/tree/master/examples
6161
## for examples using the http-user-agent
@@ -107,16 +107,16 @@ jobs:
107107
uses: actions/cache@v3
108108
with:
109109
path: ${{ env.R_LIBS_USER }}
110-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.4.2-${{ hashFiles('.github/depends.Rds') }}
111-
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.4.2-
110+
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.5.2-${{ hashFiles('.github/depends.Rds') }}
111+
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.5.2-
112112

113113
- name: Cache R packages on Linux
114114
if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' "
115115
uses: actions/cache@v3
116116
with:
117117
path: /home/runner/work/_temp/Library
118-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4.2-${{ hashFiles('.github/depends.Rds') }}
119-
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4.2-
118+
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.5.2-${{ hashFiles('.github/depends.Rds') }}
119+
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.5.2-
120120

121121
- name: Install Linux system dependencies
122122
if: runner.os == 'Linux'
@@ -339,7 +339,7 @@ jobs:
339339
if: failure()
340340
uses: actions/upload-artifact@v4
341341
with:
342-
name: ${{ runner.os }}-biocversion-RELEASE-r-4.4.2-results
342+
name: ${{ runner.os }}-biocversion-RELEASE-r-4.5.2-results
343343
path: check
344344

345345
- uses: docker/build-push-action@v1

.github/workflows/lint.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
33
on:
44
push:
5-
branches:
6-
- devel
5+
branches: [devel, devel_pre, devel_pre_v4]
76
pull_request:
8-
branches: [main, devel]
7+
branches: [devel, devel_pre, devel_pre_v4]
98

109
name: lint
1110

.github/workflows/test-coverage.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
33
on:
44
push:
5-
branches:
6-
- devel
5+
branches: [devel, devel_pre, devel_pre_v4]
76
pull_request:
8-
branches: [main, devel]
7+
branches: [devel, devel_pre, devel_pre_v4]
98

109
name: test-coverage
1110

R/bambu-extendAnnotations-utilityCombine.R

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -300,23 +300,15 @@ makeUnsplicedTibble <- function(combinedNewUnsplicedSe,newUnsplicedSeList,
300300
} , BPPARAM = bpParameters))
301301
newUnsplicedTibble <- newUnsplicedTibble %>%
302302
left_join(rowDataCombined, by = "row_id") %>%
303-
separate(row_id, c("sample","rcName"), sep = "\\-") %>%
304-
mutate(sample_id = as.integer(gsub("s","",sample))) %>%
305-
mutate(sample_name = colDataNames[sample_id]) %>%
306-
select(-sample, -sample_id) %>%
307303
mutate(readCount_tmp = readCount) %>%
308-
group_by(chr,strand, start, end, sample_name) %>%
304+
group_by(chr,strand, start, end) %>%
309305
summarise(readCount = sum(readCount),
310-
geneReadProp = sum(geneReadProp),
311-
txScore = weighted.mean(txScore, readCount_tmp),
312-
txScore.noFit = weighted.mean(txScore.noFit, readCount_tmp)) %>%
313-
group_by(chr, strand, start, end) %>%
314-
summarise(readCount = sum(readCount),
315-
maxTxScore = txScore,
316-
maxTxScore.noFit = txScore.noFit,
317-
NSampleReadCount = sum(readCount >= min.readCount),
318-
NSampleReadProp = sum(geneReadProp >=
319-
min.readFractionByGene),
320-
NSampleTxScore = sum(maxTxScore > min.txScore.singleExon))
306+
maxTxScore = weighted.mean(txScore, readCount_tmp),
307+
maxTxScore.noFit = weighted.mean(txScore.noFit, readCount_tmp),
308+
NSampleReadCount = sum(readCount_tmp >= min.readCount),
309+
NSampleReadProp = sum(geneReadProp >=
310+
min.readFractionByGene),
311+
NSampleTxScore = sum(txScore > min.txScore.singleExon))
312+
321313
return(newUnsplicedTibble)
322314
}

R/bambu-extendAnnotations-utilityExtend.R

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,12 @@ recommendNDR <- function(combinedTranscripts, baselineFDR = 0.1, NDR = NULL, def
130130
equal[is.na(equal)] = FALSE
131131

132132
#add envirnment so poly() works
133-
attr(defaultModels$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv()))
133+
if(is.null(defaultModels$lmNDR[["terms"]])){
134+
frm <- defaultModels$lmNDR$call$formula
135+
defaultModels$lmNDR[["terms"]] <- terms(as.formula(frm))
136+
attr(defaultModels$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv()))
137+
}
138+
#attr(defaultModels$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv()))
134139
baseline = predict(defaultModels$lmNDR, newdata=data.frame(NDR=baselineFDR))
135140
attr(defaultModels$lmNDR[["terms"]], ".Environment") = c()
136141

R/bambu-processReads_scoreReadClasses.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ getTranscriptScore = function(rowData, model = NULL, defaultModels){
158158
} else txScoreSE = NULL
159159
} else {
160160
if (!is.null(defaultModels)){
161-
txScore = predict(defaultModels$transcriptModelME,
161+
txScore = predict(defaultModels$transcriptModelME,
162162
as.matrix(features))
163-
txScoreSE = predict(defaultModels$transcriptModelSE,
163+
txScoreSE = predict(defaultModels$transcriptModelSE,
164164
as.matrix(features))
165165
} else {
166166
warning("Transcript model not trained. ",
@@ -292,6 +292,7 @@ prepareTranscriptModelFeatures = function(rowData){
292292
tx_strand_bias = readCount.posStrand, labels = equal) %>%
293293
mutate(
294294
tx_strand_bias=(1-abs(0.5-(tx_strand_bias/numReads))),
295+
labels = as.logical(labels),
295296
numReads = log2(pmax(1,1+(numReads/scalingFactor)))
296297
)
297298
return(outData)

R/bambu-processReads_utilityJunctionErrorCorrection.R

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ testSpliceSites <- function(data, splice = "Start", prime = "start",
9191
predSplice.prime <- NULL
9292
if (is.null(junctionModel)) {
9393
model = fitXGBoostModel(labels.train =
94-
as.integer(annotatedSplice)[mySet.all][mySet.training],
94+
as.logical(annotatedSplice)[mySet.all][mySet.training],
9595
data.train = modelmatrix[mySet.training,],
9696
show.cv = verbose, maxSize.cv = 10000)
9797

@@ -210,11 +210,10 @@ fitXGBoostModel <- function(labels.train, data.train, nrounds = 50,
210210
data.train.cv.test <- data.train[-mySample,]
211211
labels.train.cv.test <- labels.train[-mySample]
212212

213-
cv.fit <- xgboost(data = data.train.cv,
214-
label = labels.train.cv, nthread = 1, nrounds = nrounds,
213+
cv.fit <- xgboost(x = data.train.cv,
214+
y = labels.train.cv, nthread = 1, nrounds = nrounds,
215215
objective = "binary:logistic",
216-
eval_metric = 'error',
217-
verbose = 0)
216+
eval_metric = 'error')
218217
predictions <- predict(cv.fit, data.train.cv.test)
219218
message('prediction accuracy (CV) (higher for splice ',
220219
'donor than splice acceptor)')
@@ -228,11 +227,10 @@ fitXGBoostModel <- function(labels.train, data.train, nrounds = 50,
228227
message("AUC: ", evaluatePerformance(labels.train.cv.test == 1,predictions)$AUC)
229228
}
230229

231-
cv.fit <- xgboost(data = data.train,
232-
label = labels.train, nthread=1, nrounds=nrounds,
230+
cv.fit <- xgboost(x = data.train,
231+
y = labels.train, nthread=1, nrounds=nrounds,
233232
objective = "binary:logistic",
234-
eval_metric='error',
235-
verbose = 0)
233+
eval_metric='error')
236234

237235
return(cv.fit)
238236
}

R/bambu.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
149149

150150
emParameters <- setEmParameters(emParameters = opt.em)
151151
bpParameters <- setBiocParallelParameters(reads, ncore, verbose)
152-
152+
xgb.set.config(nthread = 1)
153153
rm.readClassSe <- FALSE
154154
readClassList = reads
155155
isRDSs = all(sapply(reads, class)=="RangedSummarizedExperiment")

R/bambu_utilityFunctions.R

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
#' @importFrom BiocParallel bpparam
55
#' @noRd
66
setBiocParallelParameters <- function(reads, ncore, verbose){
7-
if(ncore >= 2) message("WARNING - If you change the number of cores (ncore) ",
8-
"between Bambu runs and there is no progress please restart your R session ",
9-
"to resolve the issue that originates from the XGboost package.")
107
bpParameters <- bpparam()
118
#===# set parallel options: otherwise use parallel to distribute samples
129
bpParameters$workers <- ifelse(length(reads) == 1, 1, ncore)

R/sysdata.rda

2.41 KB
Binary file not shown.

0 commit comments

Comments
 (0)