|
| 1 | +# Rabin–Karp String Search Algorithm in R |
| 2 | +# Author: sgindeed |
| 3 | +# Description: Finds all occurrences of a pattern in a given text using a rolling hash technique. |
| 4 | + |
| 5 | +# Ask user for input |
| 6 | +text <- readline(prompt = "Enter the text: ") |
| 7 | +pattern <- readline(prompt = "Enter the pattern to search: ") |
| 8 | + |
| 9 | +# Convert both to lowercase for case-insensitive matching |
| 10 | +text <- tolower(text) |
| 11 | +pattern <- tolower(pattern) |
| 12 | + |
| 13 | +# Get lengths |
| 14 | +n <- nchar(text) |
| 15 | +m <- nchar(pattern) |
| 16 | + |
| 17 | +# Handle empty or invalid inputs |
| 18 | +if (m == 0) { |
| 19 | + cat("Empty pattern. Nothing to search.\n") |
| 20 | + quit(save = "no") |
| 21 | +} |
| 22 | + |
| 23 | +if (m > n) { |
| 24 | + cat("Pattern is longer than text. Pattern not found in text.\n") |
| 25 | + quit(save = "no") |
| 26 | +} |
| 27 | + |
| 28 | +# Constants |
| 29 | +base <- 256 # Number of possible characters |
| 30 | +mod <- 101 # A prime number for hashing |
| 31 | + |
| 32 | +# Initialize variables |
| 33 | +p_hash <- 0 # hash for pattern |
| 34 | +t_hash <- 0 # hash for text window |
| 35 | +h <- 1 # base^(m-1) |
| 36 | +matches <- c() |
| 37 | + |
| 38 | +# Compute (base^(m-1)) % mod safely |
| 39 | +for (i in seq_len(m - 1)) { |
| 40 | + h <- (h * base) %% mod |
| 41 | +} |
| 42 | + |
| 43 | +# Convert characters to ASCII values |
| 44 | +pattern_chars <- utf8ToInt(pattern) |
| 45 | +text_chars <- utf8ToInt(text) |
| 46 | + |
| 47 | +# Compute initial hash values for pattern and first window of text |
| 48 | +for (i in 1:m) { |
| 49 | + p_hash <- (base * p_hash + pattern_chars[i]) %% mod |
| 50 | + t_hash <- (base * t_hash + text_chars[i]) %% mod |
| 51 | +} |
| 52 | + |
| 53 | +# Rabin–Karp main search |
| 54 | +for (i in 0:(n - m)) { |
| 55 | + # If hash matches, verify actual substring |
| 56 | + if (p_hash == t_hash) { |
| 57 | + if (substr(text, i + 1, i + m) == pattern) { |
| 58 | + matches <- c(matches, i + 1) |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + # Slide window: remove first char, add next char |
| 63 | + if (i < n - m) { |
| 64 | + t_hash <- (base * (t_hash - text_chars[i + 1] * h) + text_chars[i + m + 1]) %% mod |
| 65 | + if (t_hash < 0) { |
| 66 | + t_hash <- t_hash + mod |
| 67 | + } |
| 68 | + } |
| 69 | +} |
| 70 | + |
| 71 | +# Display results |
| 72 | +if (length(matches) > 0) { |
| 73 | + cat("Pattern found at positions:", paste(matches, collapse = ", "), "\n") |
| 74 | +} else { |
| 75 | + cat("Pattern not found in the given text.\n") |
| 76 | +} |
0 commit comments