|
1 | 1 | #include "data.table.h" |
2 | 2 | #include <Rdefines.h> |
| 3 | + |
| 4 | + |
3 | 5 | // #include <signal.h> // the debugging machinery + breakpoint aidee |
4 | 6 | // raise(SIGINT); |
5 | 7 |
|
@@ -176,33 +178,79 @@ bool is_default_measure(SEXP vec) { |
176 | 178 |
|
177 | 179 | // maybe unlist, then unique, then set_diff. |
178 | 180 | SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { |
| 181 | + // Protect input list/vector, unlisting if necessary |
179 | 182 | SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list); |
| 183 | + |
| 184 | + // Check for duplicated elements in the input vector |
180 | 185 | SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); |
| 186 | + |
181 | 187 | int n_unique_cols = 0; |
182 | | - for (int i=0; i<length(int_vec); ++i) { |
| 188 | + |
| 189 | + // Allocate a vector to store invalid column indices (initially max size is length of int_vec) |
| 190 | + SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec))); |
| 191 | + int* invalid_col_ptr = INTEGER(invalid_columns); |
| 192 | + int invalid_count = 0; |
| 193 | + |
| 194 | + // Iterate through the column numbers to identify invalid and unique columns |
| 195 | + for (int i = 0; i < length(int_vec); ++i) { |
183 | 196 | int col_number = INTEGER(int_vec)[i]; |
| 197 | + |
| 198 | + // Check if the column number is within valid range |
184 | 199 | bool good_number = 0 < col_number && col_number <= ncol; |
185 | | - if (is_measure) good_number |= (col_number==NA_INTEGER); |
186 | | - if (!good_number) { |
187 | | - if (is_measure) { |
188 | | - error(_("One or more values in 'measure.vars' is invalid.")); |
189 | | - } else { |
190 | | - error(_("One or more values in 'id.vars' is invalid.")); |
| 200 | + |
| 201 | + // Special check for 'measure' case (NA_INTEGER handling) |
| 202 | + if (is_measure) good_number |= (col_number == NA_INTEGER); |
| 203 | + |
| 204 | + // Collect invalid columns if not valid or out of range |
| 205 | + if (!good_number || col_number == 0) { |
| 206 | + invalid_col_ptr[invalid_count++] = col_number; |
| 207 | + } else if (!LOGICAL(is_duplicated)[i]) { |
| 208 | + n_unique_cols++; |
| 209 | + } |
| 210 | + } |
| 211 | + |
| 212 | + // If invalid columns are found, construct the error message |
| 213 | + if (invalid_count > 0) { |
| 214 | + // Buffer for concatenated invalid column messages |
| 215 | + char buffer[4096] = ""; // Large enough to store the concatenated string |
| 216 | + for (int i = 0; i < invalid_count; ++i) { |
| 217 | + char temp[32]; |
| 218 | + snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); // Format the column number |
| 219 | + |
| 220 | + if (i > 0) { |
| 221 | + strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); // Add separator |
191 | 222 | } |
192 | | - } else if (!LOGICAL(is_duplicated)[i]) n_unique_cols++; |
| 223 | + strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); // Append to the buffer |
| 224 | + } |
| 225 | + |
| 226 | + // Throw the error with the concatenated message |
| 227 | + error(_("One or more values in '%s' are invalid; please fix by removing: %s"), |
| 228 | + is_measure ? "measure.vars" : "id.vars", buffer); |
193 | 229 | } |
| 230 | + |
| 231 | + // Proceed with collecting unique columns |
194 | 232 | SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); |
195 | 233 | int unique_i = 0; |
196 | | - for (int i=0; i<length(is_duplicated); ++i) { |
| 234 | + |
| 235 | + // Populate the unique column numbers into the new vector |
| 236 | + for (int i = 0; i < length(is_duplicated); ++i) { |
197 | 237 | if (!LOGICAL(is_duplicated)[i]) { |
198 | 238 | INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i]; |
199 | 239 | } |
200 | 240 | } |
| 241 | + |
| 242 | + // Apply set difference to get final unique column indices |
201 | 243 | SEXP out = set_diff(unique_col_numbers, ncol); |
202 | | - UNPROTECT(3); |
| 244 | + |
| 245 | + // Unprotect all allocated objects |
| 246 | + UNPROTECT(4); // Unprotect input, duplication check, invalid columns, and unique columns |
| 247 | + |
203 | 248 | return out; |
204 | 249 | } |
205 | 250 |
|
| 251 | + |
| 252 | + |
| 253 | + |
206 | 254 | SEXP cols_to_int_or_list(SEXP cols, SEXP dtnames, bool is_measure) { |
207 | 255 | switch(TYPEOF(cols)) { |
208 | 256 | case STRSXP : return chmatch(cols, dtnames, 0); |
|
0 commit comments