Skip to content

Commit 2359969

Browse files
authored
Merge pull request #40 from fmicompbio/hgvs-names
HGVS names
2 parents 4d566cc + 19db145 commit 2359969

15 files changed

+539
-45
lines changed

.github/workflows/R-CMD-check.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
config:
2020
- { os: macOS-latest, bioc: 'release', curlConfigPath: '/usr/bin/'}
2121
- { os: windows-latest, bioc: 'release'}
22-
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_15", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
22+
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_16", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
2323

2424
env:
2525
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: mutscan
22
Title: Preprocessing and Analysis of Deep Mutational Scanning Data
3-
Version: 0.2.34
3+
Version: 0.2.35
44
Authors@R:
55
c(person(given = "Charlotte",
66
family = "Soneson",

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# mutscan 0.2.35
2+
3+
* Add alternative names for variants (including HGVS identifiers)
4+
5+
# mutscan 0.2.34
6+
7+
* Expand examples in function documentation
8+
19
# mutscan 0.2.33
210

311
* Replace Matrix.utils::aggregate.Matrix (removed from CRAN) by DelayedArray::rowsum

R/RcppExports.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ translateString <- function(s) {
3232
.Call(`_mutscan_translateString`, s)
3333
}
3434

35+
makeBaseHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq, varSeq) {
36+
.Call(`_mutscan_makeBaseHGVS`, mutationsSorted, mutNameDelimiter, wtSeq, varSeq)
37+
}
38+
39+
test_makeAAHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq) {
40+
.Call(`_mutscan_test_makeAAHGVS`, mutationsSorted, mutNameDelimiter, wtSeq)
41+
}
42+
3543
test_decomposeRead <- function(sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength) {
3644
.Call(`_mutscan_test_decomposeRead`, sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength)
3745
}

R/collapseMutantsByAA.R

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,13 @@ collapseMutantsByAA <- function(se, nameCol = "mutantNameAA") {
5858
rd <- mergeValues(SummarizedExperiment::rowData(se)[[nameCol]],
5959
SummarizedExperiment::rowData(se)$sequence) %>%
6060
stats::setNames(c(nameCol, "sequence"))
61-
for (v in c("mutantName", "sequenceAA", "mutationTypes",
62-
"nbrMutBases", "nbrMutCodons", "nbrMutAAs")) {
61+
for (v in setdiff(
62+
intersect(c("mutantName", "mutantNameBase", "mutantNameBaseHGVS",
63+
"mutantNameCodon", "mutantNameAA", "mutantNameAAHGVS",
64+
"sequenceAA", "mutationTypes",
65+
"nbrMutBases", "nbrMutCodons", "nbrMutAAs"),
66+
colnames(SummarizedExperiment::rowData(se))),
67+
nameCol)) {
6368
tmp <- mergeValues(SummarizedExperiment::rowData(se)[[nameCol]],
6469
SummarizedExperiment::rowData(se)[[v]])
6570
rd[[v]] <- tmp$valueColl[match(rd[[nameCol]], tmp$mutantNameColl)]

R/digestFastqs.R

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,18 @@
193193
#' \item{summaryTable}{A \code{data.frame} that contains, for each observed
194194
#' mutation combination, the corresponding variable region sequences (or pair of
195195
#' sequences), the number of observed such sequences, and the number of unique
196-
#' UMIs observed for the sequence. It also has a column named 'maxNbrReads',
197-
#' which contains the number of reads for the most frequent observed sequence
196+
#' UMIs observed for the sequence. It also has additional columns: 'maxNbrReads'
197+
#' contains the number of reads for the most frequent observed sequence
198198
#' represented by the feature (only relevant if similar variable regions are
199-
#' collapsed).}
199+
#' collapsed). 'nbrMutBases', 'nbrMutCodons' and 'nbrMutAAs' give the number of
200+
#' mutated bases, codons or amino acids in each variant. Alternative variant
201+
#' names based on base, codon or amino acid sequence are provided in columns
202+
#' 'mutantNameBase', 'mutantNameCodon', 'mutantNameAA'. In addition,
203+
#' 'mutantNameBaseHGVS' and 'mutantNameAAHGVS' give base- and amino acid-based
204+
#' names following the HGVS nomenclature (https://varnomen.hgvs.org/). Please
205+
#' note that the provided reference sequence names are used for the HGVS
206+
#' sequence identifiers. It is up to the user to use appropriately named
207+
#' reference sequences in order to obtain valid HGVS variant names.}
200208
#' \item{filterSummary}{A \code{data.frame} that contains the number of input
201209
#' reads, the number of reads filtered out in the processing, and the number of
202210
#' retained reads. The filters are named according to the convention

R/summarizeExperiment.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,12 @@ summarizeExperiment <- function(x, coldata, countType = "umis") {
148148
## Also here, each column can contain multiple values separated with ,
149149
## (e.g. if variable sequences were collapsed to WT in digestFastqs)
150150
## ------------------------------------------------------------------------
151-
for (v in c("nbrMutBases", "nbrMutCodons", "nbrMutAAs",
152-
"sequenceAA", "mutantNameAA", "mutationTypes", "varLengths")) {
151+
for (v in intersect(c("nbrMutBases", "nbrMutCodons", "nbrMutAAs",
152+
"mutantNameBase", "mutantNameBaseHGVS",
153+
"mutantNameCodon", "mutantNameAA", "mutantNameAAHGVS",
154+
"sequenceAA", "mutationTypes",
155+
"varLengths"),
156+
colnames(tmpdf))) {
153157
tmp <- mergeValues(tmpdf$mutantName, tmpdf[[v]]) %>%
154158
stats::setNames(c("mutantName", v))
155159
allSequences[[v]] <- tmp[[v]][match(allSequences$mutantName,

inst/extdata/GSE102901_cis_se.rds

9.65 KB
Binary file not shown.

man/digestFastqs.Rd

Lines changed: 11 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/RcppExports.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,43 @@ BEGIN_RCPP
3737
END_RCPP
3838
}
3939
// translateString
40-
std::string translateString(std::string& s);
40+
std::string translateString(const std::string& s);
4141
RcppExport SEXP _mutscan_translateString(SEXP sSEXP) {
4242
BEGIN_RCPP
4343
Rcpp::RObject rcpp_result_gen;
4444
Rcpp::RNGScope rcpp_rngScope_gen;
45-
Rcpp::traits::input_parameter< std::string& >::type s(sSEXP);
45+
Rcpp::traits::input_parameter< const std::string& >::type s(sSEXP);
4646
rcpp_result_gen = Rcpp::wrap(translateString(s));
4747
return rcpp_result_gen;
4848
END_RCPP
4949
}
50+
// makeBaseHGVS
51+
std::string makeBaseHGVS(const std::vector<std::string> mutationsSorted, const std::string mutNameDelimiter, const std::string wtSeq, const std::string varSeq);
52+
RcppExport SEXP _mutscan_makeBaseHGVS(SEXP mutationsSortedSEXP, SEXP mutNameDelimiterSEXP, SEXP wtSeqSEXP, SEXP varSeqSEXP) {
53+
BEGIN_RCPP
54+
Rcpp::RObject rcpp_result_gen;
55+
Rcpp::RNGScope rcpp_rngScope_gen;
56+
Rcpp::traits::input_parameter< const std::vector<std::string> >::type mutationsSorted(mutationsSortedSEXP);
57+
Rcpp::traits::input_parameter< const std::string >::type mutNameDelimiter(mutNameDelimiterSEXP);
58+
Rcpp::traits::input_parameter< const std::string >::type wtSeq(wtSeqSEXP);
59+
Rcpp::traits::input_parameter< const std::string >::type varSeq(varSeqSEXP);
60+
rcpp_result_gen = Rcpp::wrap(makeBaseHGVS(mutationsSorted, mutNameDelimiter, wtSeq, varSeq));
61+
return rcpp_result_gen;
62+
END_RCPP
63+
}
64+
// test_makeAAHGVS
65+
std::string test_makeAAHGVS(const std::vector<std::string> mutationsSorted, const std::string mutNameDelimiter, const std::string wtSeq);
66+
RcppExport SEXP _mutscan_test_makeAAHGVS(SEXP mutationsSortedSEXP, SEXP mutNameDelimiterSEXP, SEXP wtSeqSEXP) {
67+
BEGIN_RCPP
68+
Rcpp::RObject rcpp_result_gen;
69+
Rcpp::RNGScope rcpp_rngScope_gen;
70+
Rcpp::traits::input_parameter< const std::vector<std::string> >::type mutationsSorted(mutationsSortedSEXP);
71+
Rcpp::traits::input_parameter< const std::string >::type mutNameDelimiter(mutNameDelimiterSEXP);
72+
Rcpp::traits::input_parameter< const std::string >::type wtSeq(wtSeqSEXP);
73+
rcpp_result_gen = Rcpp::wrap(test_makeAAHGVS(mutationsSorted, mutNameDelimiter, wtSeq));
74+
return rcpp_result_gen;
75+
END_RCPP
76+
}
5077
// test_decomposeRead
5178
List test_decomposeRead(const std::string sseq, const std::string squal, const std::string elements, const std::vector<int> elementLengths, const std::vector<std::string> primerSeqs, std::string umiSeq, std::string varSeq, std::string varQual, std::vector<int> varLengths, std::string constSeq, std::string constQual, int nNoPrimer, int nReadWrongLength);
5279
RcppExport SEXP _mutscan_test_decomposeRead(SEXP sseqSEXP, SEXP squalSEXP, SEXP elementsSEXP, SEXP elementLengthsSEXP, SEXP primerSeqsSEXP, SEXP umiSeqSEXP, SEXP varSeqSEXP, SEXP varQualSEXP, SEXP varLengthsSEXP, SEXP constSeqSEXP, SEXP constQualSEXP, SEXP nNoPrimerSEXP, SEXP nReadWrongLengthSEXP) {
@@ -243,6 +270,8 @@ static const R_CallMethodDef CallEntries[] = {
243270
{"_mutscan_calcNearestStringDist", (DL_FUNC) &_mutscan_calcNearestStringDist, 3},
244271
{"_mutscan_compareCodonPositions", (DL_FUNC) &_mutscan_compareCodonPositions, 3},
245272
{"_mutscan_translateString", (DL_FUNC) &_mutscan_translateString, 1},
273+
{"_mutscan_makeBaseHGVS", (DL_FUNC) &_mutscan_makeBaseHGVS, 4},
274+
{"_mutscan_test_makeAAHGVS", (DL_FUNC) &_mutscan_test_makeAAHGVS, 3},
246275
{"_mutscan_test_decomposeRead", (DL_FUNC) &_mutscan_test_decomposeRead, 13},
247276
{"_mutscan_test_mergeReadPairPartial", (DL_FUNC) &_mutscan_test_mergeReadPairPartial, 12},
248277
{"_mutscan_findClosestRefSeq", (DL_FUNC) &_mutscan_findClosestRefSeq, 4},

0 commit comments

Comments
 (0)