Skip to content

Commit e5f6012

Browse files
authored
feat: Add Rabin–Karp string search algorithm in R (#166)
1 parent 96737fe commit e5f6012

File tree

1 file changed

+76
-0
lines changed

1 file changed

+76
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Rabin–Karp String Search Algorithm in R
2+
# Author: sgindeed
3+
# Description: Finds all occurrences of a pattern in a given text using a rolling hash technique.
4+
5+
# Ask user for input
6+
text <- readline(prompt = "Enter the text: ")
7+
pattern <- readline(prompt = "Enter the pattern to search: ")
8+
9+
# Convert both to lowercase for case-insensitive matching
10+
text <- tolower(text)
11+
pattern <- tolower(pattern)
12+
13+
# Get lengths
14+
n <- nchar(text)
15+
m <- nchar(pattern)
16+
17+
# Handle empty or invalid inputs
18+
if (m == 0) {
19+
cat("Empty pattern. Nothing to search.\n")
20+
quit(save = "no")
21+
}
22+
23+
if (m > n) {
24+
cat("Pattern is longer than text. Pattern not found in text.\n")
25+
quit(save = "no")
26+
}
27+
28+
# Constants
29+
base <- 256 # Number of possible characters
30+
mod <- 101 # A prime number for hashing
31+
32+
# Initialize variables
33+
p_hash <- 0 # hash for pattern
34+
t_hash <- 0 # hash for text window
35+
h <- 1 # base^(m-1)
36+
matches <- c()
37+
38+
# Compute (base^(m-1)) % mod safely
39+
for (i in seq_len(m - 1)) {
40+
h <- (h * base) %% mod
41+
}
42+
43+
# Convert characters to ASCII values
44+
pattern_chars <- utf8ToInt(pattern)
45+
text_chars <- utf8ToInt(text)
46+
47+
# Compute initial hash values for pattern and first window of text
48+
for (i in 1:m) {
49+
p_hash <- (base * p_hash + pattern_chars[i]) %% mod
50+
t_hash <- (base * t_hash + text_chars[i]) %% mod
51+
}
52+
53+
# Rabin–Karp main search
54+
for (i in 0:(n - m)) {
55+
# If hash matches, verify actual substring
56+
if (p_hash == t_hash) {
57+
if (substr(text, i + 1, i + m) == pattern) {
58+
matches <- c(matches, i + 1)
59+
}
60+
}
61+
62+
# Slide window: remove first char, add next char
63+
if (i < n - m) {
64+
t_hash <- (base * (t_hash - text_chars[i + 1] * h) + text_chars[i + m + 1]) %% mod
65+
if (t_hash < 0) {
66+
t_hash <- t_hash + mod
67+
}
68+
}
69+
}
70+
71+
# Display results
72+
if (length(matches) > 0) {
73+
cat("Pattern found at positions:", paste(matches, collapse = ", "), "\n")
74+
} else {
75+
cat("Pattern not found in the given text.\n")
76+
}

0 commit comments

Comments
 (0)