@@ -24,6 +24,10 @@ calcNearestStringDist <- function(x, metric = "hamming", nThreads = 1L) {
2424 .Call(`_mutscan_calcNearestStringDist` , x , metric , nThreads )
2525}
2626
27+ complement <- function (n ) {
28+ .Call(`_mutscan_complement` , n )
29+ }
30+
2731compareCodonPositions <- function (a , b , mutNameDelimiter ) {
2832 .Call(`_mutscan_compareCodonPositions` , a , b , mutNameDelimiter )
2933}
@@ -40,6 +44,10 @@ test_makeAAHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq) {
4044 .Call(`_mutscan_test_makeAAHGVS` , mutationsSorted , mutNameDelimiter , wtSeq )
4145}
4246
47+ test_compareToWildtype <- function (varSeq , wtSeq , varIntQual , forbiddenCodons_vect , mutatedPhredMin = 0.0 , nbrMutatedCodonsMax = - 1L , codonPrefix = " c" , nbrMutatedBasesMax = - 1L , mutNameDelimiter = " ." , collapseToWT = FALSE ) {
48+ .Call(`_mutscan_test_compareToWildtype` , varSeq , wtSeq , varIntQual , forbiddenCodons_vect , mutatedPhredMin , nbrMutatedCodonsMax , codonPrefix , nbrMutatedBasesMax , mutNameDelimiter , collapseToWT )
49+ }
50+
4351test_decomposeRead <- function (sseq , squal , elements , elementLengths , primerSeqs , umiSeq , varSeq , varQual , varLengths , constSeq , constQual , nNoPrimer , nReadWrongLength ) {
4452 .Call(`_mutscan_test_decomposeRead` , sseq , squal , elements , elementLengths , primerSeqs , umiSeq , varSeq , varQual , varLengths , constSeq , constQual , nNoPrimer , nReadWrongLength )
4553}
@@ -57,45 +65,45 @@ findClosestRefSeqEarlyStop <- function(varSeq, wtSeq, upperBoundMismatch, sim) {
5765}
5866
5967# ' Create a conversion table for collapsing similar sequences
60- # ' @param seqs Character vector with nucleotide sequences (or pairs of
61- # ' sequences concatenated with "_") to be collapsed. The sequences must
68+ # ' @param seqs Character vector with nucleotide sequences (or pairs of
69+ # ' sequences concatenated with "_") to be collapsed. The sequences must
6270# ' all be of the same length.
6371# ' @param scores Numeric vector of "scores" for the sequences. Typically
64- # ' the total read/UMI count. A higher score will be preferred when
65- # ' deciding which sequence to use as the representative for a group of
72+ # ' the total read/UMI count. A higher score will be preferred when
73+ # ' deciding which sequence to use as the representative for a group of
6674# ' collapsed sequences.
67- # ' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing
68- # ' similar sequences. If the value is in [0, 1), it defines the maximal
75+ # ' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing
76+ # ' similar sequences. If the value is in [0, 1), it defines the maximal
6977# ' Hamming distance in terms of a fraction of sequence length:
7078# ' (\code{round(collapseMaxDist * nchar(sequence))}).
7179# ' A value greater or equal to 1 is rounded and directly used as the maximum
7280# ' allowed Hamming distance. Note that sequences can only be
73- # ' collapsed if they are all of the same length.
74- # ' @param collapseMinScore Numeric scalar, indicating the minimum score
75- # ' required for a sequence to be considered as a representative for a
76- # ' group of similar sequences (i.e., to allow other sequences to be
77- # ' collapsed into it).
81+ # ' collapsed if they are all of the same length. The default value is 0.
82+ # ' @param collapseMinScore Numeric scalar, indicating the minimum score
83+ # ' required for a sequence to be considered as a representative for a
84+ # ' group of similar sequences (i.e., to allow other sequences to be
85+ # ' collapsed into it). The default value is 0.
7886# ' @param collapseMinRatio Numeric scalar. During collapsing of
79- # ' similar sequences, a low-frequency sequence will be collapsed
80- # ' with a higher-frequency sequence only if the ratio between the
81- # ' high-frequency and the low-frequency scores is at least this
87+ # ' similar sequences, a low-frequency sequence will be collapsed
88+ # ' with a higher-frequency sequence only if the ratio between the
89+ # ' high-frequency and the low-frequency scores is at least this
8290# ' high. A value of 0 indicates that no such check is performed.
8391# ' @param verbose Logical scalar, whether to print progress messages.
84- # '
85- # ' @return A data.frame with two columns, containing the input sequences
92+ # '
93+ # ' @return A data.frame with two columns, containing the input sequences
8694# ' and the representatives for the groups resulting from grouping similar
8795# ' sequences, respectively.
88- # '
96+ # '
8997# ' @examples
9098# ' seqs <- c("AACGTAGCA", "ACCGTAGCA", "AACGGAGCA", "ATCGGAGCA", "TGAGGCATA")
9199# ' scores <- c(5, 1, 3, 1, 8)
92- # ' groupSimilarSequences(seqs = seqs, scores = scores,
93- # ' collapseMaxDist = 1, collapseMinScore = 0,
100+ # ' groupSimilarSequences(seqs = seqs, scores = scores,
101+ # ' collapseMaxDist = 1, collapseMinScore = 0,
94102# ' collapseMinRatio = 0, verbose = FALSE)
95- # '
103+ # '
96104# ' @export
97105# ' @author Michael Stadler, Charlotte Soneson
98- groupSimilarSequences <- function (seqs , scores , collapseMaxDist , collapseMinScore , collapseMinRatio , verbose ) {
106+ groupSimilarSequences <- function (seqs , scores , collapseMaxDist = 0.0 , collapseMinScore = 0.0 , collapseMinRatio = 0.0 , verbose = FALSE ) {
99107 .Call(`_mutscan_groupSimilarSequences` , seqs , scores , collapseMaxDist , collapseMinScore , collapseMinRatio , verbose )
100108}
101109
0 commit comments