|
111 | 111 |
|
112 | 112 | dset <- function(group1, group2, nmc = 10000, returnData = FALSE){ |
113 | 113 | stopifnot(nmc >= 0) |
114 | | - # TODO: add more error checks: |
115 | | - # nmc must be a non-neg integer, not 1, & not too small relative to n,m(?); |
116 | | - # group1 and group2 must be numeric, vectors, and non-empty |
| 114 | + stopifnot(nmc != 1) |
| 115 | + stopifnot(is.numeric(group1) & is.numeric(group2)) |
| 116 | + stopifnot(length(group1) >= 1 & length(group2) >= 1) |
| 117 | + stopifnot(!any(is.na(c(group1, group2)))) |
117 | 118 |
|
118 | | - # creates the dataset referenced in pval and cint |
119 | 119 | combined <- c(group1, group2) |
120 | 120 |
|
121 | 121 | n <- length(group1) |
122 | 122 | m <- length(group2) |
123 | 123 | den <- (1/n + 1/m) |
124 | 124 |
|
125 | 125 | N <- n + m |
126 | | - num <- choose(N, n) |
127 | | - if(nmc == 0 | num <= nmc) { |
128 | | - dcombn <- utils::combn(1:N, n) |
| 126 | + num <- choose(N, n) # number of possible combinations |
| 127 | + |
| 128 | + # Form a matrix where each column contains indices in new "group1" for that comb or perm |
| 129 | + if(nmc == 0 | num <= nmc) { # take all possible combinations |
| 130 | + dcombn1 <- utils::combn(1:N, n) |
129 | 131 | } else { # use Monte Carlo sample of permutations, not all possible combinations |
130 | | - dcombn <- replicate(nmc, sample(N, n)) |
131 | | - dcombn[,1] <- 1:n # force the 1st "combination" to be original data order |
| 132 | + dcombn1 <- replicate(nmc, sample(N, n)) |
| 133 | + dcombn1[,1] <- 1:n # force the 1st "combination" to be original data order |
132 | 134 | num <- nmc |
133 | 135 | } |
134 | 136 |
|
135 | | - dcombn2 <- apply(dcombn, 2, function(x) setdiff(1:N, x)) |
136 | | - group1_perm <- matrix(combined[dcombn], nrow = n) |
| 137 | + # Form the equivalent matrix for indices in new "group2" |
| 138 | + dcombn2 <- apply(dcombn1, 2, function(x) setdiff(1:N, x)) |
| 139 | + |
| 140 | + # Form the corresponding matrices of data values, not data indices |
| 141 | + group1_perm <- matrix(combined[dcombn1], nrow = n) |
137 | 142 | group2_perm <- matrix(combined[dcombn2], nrow = m) |
138 | 143 |
|
139 | | - k <- colSums(matrix(dcombn %in% ((n+1):N), nrow=n)) |
| 144 | + # For each comb or perm, compute: |
| 145 | + # difference in group means; sum in group1; difference in group medians; |
| 146 | + # and sum of *ranks* in group1 (the statistic for the Wilcoxon rank sum test) |
140 | 147 | diffmean <- colMeans(group1_perm) - colMeans(group2_perm) |
141 | 148 | sum1 <- colSums(group1_perm) |
142 | 149 | diffmedian <- matrixStats::colMedians(group1_perm) - matrixStats::colMedians(group2_perm) |
143 | | - |
144 | 150 | r <- rank(combined, ties.method = "first") |
145 | | - wilsum <- colSums(matrix(r[dcombn], nrow = n)) |
146 | | - wkd = (diffmean[1] - diffmean) / (k * den) |
| 151 | + wilsum <- colSums(matrix(r[dcombn1], nrow = n)) |
| 152 | + |
| 153 | + # For each comb or perm, compute: |
| 154 | + # k = how many values swapped from group1 to group2? |
| 155 | + # wkd = Nguyen (2009) statistic whose quantiles are used for CI endpoints |
| 156 | + k <- colSums(matrix(dcombn1 %in% ((n+1):N), nrow=n)) |
| 157 | + wkd <- (diffmean[1] - diffmean) / (k * den) |
147 | 158 |
|
148 | 159 | dataframe <- data.frame(diffmean = diffmean, |
149 | 160 | sum1 = sum1, |
|
0 commit comments