@@ -298,4 +298,144 @@ Rcpp::List get_edge_components_cpp(const Rcpp::IntegerMatrix& edges, int n_nodes
298298 Rcpp::Named (" to_components" ) = to_components,
299299 Rcpp::Named (" n_components" ) = next_component_id
300300 );
301+ }
302+
303+ // ' Multi-Pattern Fixed String Matching
304+ // '
305+ // ' Fast C++ implementation for finding multiple fixed patterns in strings.
306+ // ' Equivalent to multiple grepl(pattern, x, fixed=TRUE) calls but much faster.
307+ // '
308+ // ' @param strings Character vector of strings to search in
309+ // ' @param patterns Character vector of fixed patterns to search for
310+ // ' @param match_any Logical. If TRUE, returns TRUE if ANY pattern matches.
311+ // ' If FALSE, returns a matrix showing which pattern matches which string.
312+ // ' @param ignore_case Logical. Whether to ignore case when matching. Default FALSE.
313+ // '
314+ // ' @return If match_any=TRUE: Logical vector same length as strings.
315+ // ' If match_any=FALSE: Logical matrix with nrow=length(strings), ncol=length(patterns).
316+ // '
317+ // ' @examples
318+ // ' strings <- c("hello world", "goodbye", "hello there", "world peace")
319+ // ' patterns <- c("hello", "world")
320+ // '
321+ // ' # Check if ANY pattern matches each string
322+ // ' multi_grepl_cpp(strings, patterns, match_any = TRUE)
323+ // ' # Returns: TRUE FALSE TRUE TRUE
324+ // '
325+ // ' # Get detailed matrix of which patterns match which strings
326+ // ' multi_grepl_cpp(strings, patterns, match_any = FALSE)
327+ // ' # Returns 4x2 matrix showing hello/world matches for each string
328+ // '
329+ // [[Rcpp::export]]
330+ Rcpp::LogicalMatrix multi_grepl_cpp (const Rcpp::CharacterVector& strings,
331+ const Rcpp::CharacterVector& patterns,
332+ bool match_any = true ,
333+ bool ignore_case = false ) {
334+
335+ int n_strings = strings.size ();
336+ int n_patterns = patterns.size ();
337+
338+ // Convert patterns to std::string for easier manipulation
339+ std::vector<std::string> pattern_vec (n_patterns);
340+ for (int p = 0 ; p < n_patterns; p++) {
341+ pattern_vec[p] = Rcpp::as<std::string>(patterns[p]);
342+ if (ignore_case) {
343+ // Convert pattern to lowercase
344+ std::transform (pattern_vec[p].begin (), pattern_vec[p].end (),
345+ pattern_vec[p].begin (), ::tolower);
346+ }
347+ }
348+
349+ if (match_any) {
350+ // Return vector indicating if ANY pattern matches each string
351+ Rcpp::LogicalVector result (n_strings);
352+
353+ for (int i = 0 ; i < n_strings; i++) {
354+ std::string str = Rcpp::as<std::string>(strings[i]);
355+ if (ignore_case) {
356+ std::transform (str.begin (), str.end (), str.begin (), ::tolower);
357+ }
358+
359+ bool found_match = false ;
360+ for (int p = 0 ; p < n_patterns && !found_match; p++) {
361+ if (str.find (pattern_vec[p]) != std::string::npos) {
362+ found_match = true ;
363+ }
364+ }
365+ result[i] = found_match;
366+ }
367+
368+ // Convert to matrix format for consistent return type
369+ Rcpp::LogicalMatrix result_matrix (n_strings, 1 );
370+ for (int i = 0 ; i < n_strings; i++) {
371+ result_matrix (i, 0 ) = result[i];
372+ }
373+ return result_matrix;
374+
375+ } else {
376+ // Return matrix showing which patterns match which strings
377+ Rcpp::LogicalMatrix result (n_strings, n_patterns);
378+
379+ for (int i = 0 ; i < n_strings; i++) {
380+ std::string str = Rcpp::as<std::string>(strings[i]);
381+ if (ignore_case) {
382+ std::transform (str.begin (), str.end (), str.begin (), ::tolower);
383+ }
384+
385+ for (int p = 0 ; p < n_patterns; p++) {
386+ result (i, p) = (str.find (pattern_vec[p]) != std::string::npos);
387+ }
388+ }
389+
390+ return result;
391+ }
392+ }
393+
394+ // ' Multi-Pattern Fixed String Matching (Any Match)
395+ // '
396+ // ' Simplified version that returns TRUE if any pattern matches each string.
397+ // ' Optimized for the common use case of "does this string contain any of these patterns?"
398+ // '
399+ // ' @param strings Character vector of strings to search in
400+ // ' @param patterns Character vector of fixed patterns to search for
401+ // ' @param ignore_case Logical. Whether to ignore case. Default FALSE.
402+ // '
403+ // ' @return Logical vector same length as strings
404+ // '
405+ // [[Rcpp::export]]
406+ Rcpp::LogicalVector multi_grepl_any_cpp (const Rcpp::CharacterVector& strings,
407+ const Rcpp::CharacterVector& patterns,
408+ bool ignore_case = false ) {
409+
410+ int n_strings = strings.size ();
411+ int n_patterns = patterns.size ();
412+
413+ // Convert patterns to std::string
414+ std::vector<std::string> pattern_vec (n_patterns);
415+ for (int p = 0 ; p < n_patterns; p++) {
416+ pattern_vec[p] = Rcpp::as<std::string>(patterns[p]);
417+ if (ignore_case) {
418+ std::transform (pattern_vec[p].begin (), pattern_vec[p].end (),
419+ pattern_vec[p].begin (), ::tolower);
420+ }
421+ }
422+
423+ Rcpp::LogicalVector result (n_strings);
424+
425+ for (int i = 0 ; i < n_strings; i++) {
426+ std::string str = Rcpp::as<std::string>(strings[i]);
427+ if (ignore_case) {
428+ std::transform (str.begin (), str.end (), str.begin (), ::tolower);
429+ }
430+
431+ bool found_match = false ;
432+ for (int p = 0 ; p < n_patterns && !found_match; p++) {
433+ if (str.find (pattern_vec[p]) != std::string::npos) {
434+ found_match = true ;
435+ }
436+ }
437+ result[i] = found_match;
438+ }
439+
440+ return result;
301441}
0 commit comments