From 19d033d7a60ca3fa218162e02921796e34451b5e Mon Sep 17 00:00:00 2001 From: psychemist Date: Tue, 1 Apr 2025 07:30:55 +0100 Subject: [PATCH 1/5] chore: initialize regex utils examples directory --- .tool-versions | 5 +- .../utility_examples/regex_utils/.gitignore | 5 ++ .../utility_examples/regex_utils/Scarb.lock | 29 ++++++++++++ .../utility_examples/regex_utils/Scarb.toml | 46 +++++++++++++++++++ .../regex_utils/snfoundry.toml | 11 +++++ .../regex_utils/src/lib.cairo | 1 + 6 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/.gitignore create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/Scarb.lock create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/Scarb.toml create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/snfoundry.toml create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo diff --git a/.tool-versions b/.tool-versions index 1d32f85..6b0c5d1 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,2 +1,3 @@ -scarb 2.11.3 -starknet-foundry 0.39.0 +scarb 2.9.2 +starknet-foundry 0.36.0 +local scarb 2.9.2 diff --git a/examples/cairo/scripts/utility_examples/regex_utils/.gitignore b/examples/cairo/scripts/utility_examples/regex_utils/.gitignore new file mode 100644 index 0000000..4096f8b --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/.gitignore @@ -0,0 +1,5 @@ +target +.snfoundry_cache/ +snfoundry_trace/ +coverage/ +profile/ diff --git a/examples/cairo/scripts/utility_examples/regex_utils/Scarb.lock b/examples/cairo/scripts/utility_examples/regex_utils/Scarb.lock new file mode 100644 index 0000000..0b6d1d0 --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/Scarb.lock @@ -0,0 +1,29 @@ +# Code generated by scarb DO NOT EDIT. +version = 1 + +[[package]] +name = "regex" +version = "0.1.0" + +[[package]] +name = "regex_utils" +version = "0.1.0" +dependencies = [ + "regex", + "snforge_std", +] + +[[package]] +name = "snforge_scarb_plugin" +version = "0.36.1" +source = "registry+https://scarbs.xyz/" +checksum = "sha256:c25111b805d5faa9ecca63ef3a040cf20a5340b61be695f5e587d35cb7564eed" + +[[package]] +name = "snforge_std" +version = "0.36.1" +source = "registry+https://scarbs.xyz/" +checksum = "sha256:9d7cf4808ba32136c559522b4b64d4d72495169e5f3efa56aea68a77ec02db55" +dependencies = [ + "snforge_scarb_plugin", +] diff --git a/examples/cairo/scripts/utility_examples/regex_utils/Scarb.toml b/examples/cairo/scripts/utility_examples/regex_utils/Scarb.toml new file mode 100644 index 0000000..852ad3a --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/Scarb.toml @@ -0,0 +1,46 @@ +[package] +name = "regex_utils" +version = "0.1.0" +edition = "2024_07" + +# See more keys and their definitions at https://docs.swmansion.com/scarb/docs/reference/manifest.html + +[dependencies] +starknet = "2.9.2" +regex = { path = "../../regex" } + +[dev-dependencies] +snforge_std = "0.36.0" +assert_macros = "2.9.2" + +[[target.starknet-contract]] +sierra = true + +[scripts] +test = "snforge test" + +[tool.scarb] +allow-prebuilt-plugins = ["snforge_std"] + +# Visit https://foundry-rs.github.io/starknet-foundry/appendix/scarb-toml.html for more information + +# [tool.snforge] # Define `snforge` tool section +# exit_first = true # Stop tests execution immediately upon the first failure +# fuzzer_runs = 1234 # Number of runs of the random fuzzer +# fuzzer_seed = 1111 # Seed for the random fuzzer + +# [[tool.snforge.fork]] # Used for fork testing +# name = "SOME_NAME" # Fork name +# url = "http://your.rpc.url" # Url of the RPC provider +# block_id.tag = "latest" # Block to fork from (block tag) + +# [profile.dev.cairo] # Configure Cairo compiler +# unstable-add-statements-code-locations-debug-info = true # Should be used if you want to use coverage +# unstable-add-statements-functions-debug-info = true # Should be used if you want to use coverage/profiler +# inlining-strategy = "avoid" # Should be used if you want to use coverage + +# [features] # Used for conditional compilation +# enable_for_tests = [] # Feature name and list of other features that should be enabled with it +casm = true + +[lib] \ No newline at end of file diff --git a/examples/cairo/scripts/utility_examples/regex_utils/snfoundry.toml b/examples/cairo/scripts/utility_examples/regex_utils/snfoundry.toml new file mode 100644 index 0000000..306a097 --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/snfoundry.toml @@ -0,0 +1,11 @@ +# Visit https://foundry-rs.github.io/starknet-foundry/appendix/snfoundry-toml.html +# and https://foundry-rs.github.io/starknet-foundry/projects/configuration.html for more information + +# [sncast.default] # Define a profile name +# url = "https://free-rpc.nethermind.io/sepolia-juno/v0_7" # Url of the RPC provider +# accounts-file = "../account-file" # Path to the file with the account data +# account = "mainuser" # Account from `accounts_file` or default account file that will be used for the transactions +# keystore = "~/keystore" # Path to the keystore file +# wait-params = { timeout = 300, retry-interval = 10 } # Wait for submitted transaction parameters +# block-explorer = "StarkScan" # Block explorer service used to display links to transaction details +# show-explorer-links = true # Print links pointing to pages with transaction details in the chosen block explorer diff --git a/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo b/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo new file mode 100644 index 0000000..d18669a --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo @@ -0,0 +1 @@ +mod main; From 8842ac6585d035ad7f41956622593c22f0361e9f Mon Sep 17 00:00:00 2001 From: psychemist Date: Tue, 1 Apr 2025 07:31:20 +0100 Subject: [PATCH 2/5] feat: add regex contract file and tests --- .../regex_utils/src/main.cairo | 160 +++++++++++++++ .../regex_utils/tests/test_main.cairo | 194 ++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo diff --git a/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo b/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo new file mode 100644 index 0000000..c6856e1 --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo @@ -0,0 +1,160 @@ +use core::byte_array::ByteArray; +use regex::regex::{Regex, RegexTrait}; + +fn main() { + // Example 1: Creating a new regex and checking if text matches a pattern + println!("== Example 1: new & matches =="); + let email_pattern: ByteArray = "[a-z0-9]+@[a-z]+.[a-z]+"; + let mut regex = RegexTrait::new(email_pattern); + + let valid_email: ByteArray = "user@example.com"; + let invalid_email: ByteArray = "invalid-email"; + + println!("Valid email: {}", valid_email); + println!("Matches pattern? {}", regex.matches(valid_email)); + + println!("Invalid email: {}", invalid_email); + println!("Matches pattern? {}", regex.matches(invalid_email)); + + // Example 2: Finding the first occurrence of a pattern + println!("== Example 2: find =="); + let text: ByteArray = "Contact us at support@company.com or sales@company.com"; + let mut email_regex: Regex = RegexTrait::new("[a-z]+@[a-z]+.[a-z]+"); + + match email_regex.find(text) { + Option::Some((start, end)) => { + let mut email = ""; + let mut i = start; + while i < end { + email.append_byte(text.at(i).unwrap()); + i += 1; + }; + println!("Found email: {}", email); + println!("Position: {} to {}", start, end); + }, + Option::None => { + println!("No email found in text"); + } + } + + // Example 3: Finding all occurrences of a pattern + println!("== Example 3: find_all =="); + let log_text: ByteArray = "2024-01-15 ERROR Database connection failed//2024-01-15 ERROR Authentication error//2024-01-15 INFO Retry successful"; + let mut error_regex: Regex = RegexTrait::new("ERROR.*"); + let matches = error_regex.find_all(log_text); + + println!("Found {} error messages:", matches.len()); + let mut i = 0; + while i < matches.len() { + let (start, end) = *matches.at(i); + let mut error_msg = ""; + let mut j = start; + while j < end { + error_msg.append_byte(log_text.at(j).unwrap()); + j += 1; + }; + println!(" {}: {}", i + 1, error_msg); + i += 1; + }; + + // Example 4: Replacing patterns in text + println!("== Example 4: replace =="); + let sensitive_text: ByteArray = "My credit card is 1234-5678-9012-3456 and my SSN is 123-45-6789"; + + // Replace credit card numbers + let mut cc_regex: Regex = RegexTrait::new("[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}"); + let masked_cc = cc_regex.replace(sensitive_text, "XXXX-XXXX-XXXX-XXXX"); + println!("After masking credit card:"); + println!("{}", masked_cc); + + // Also replace SSN + let mut ssn_regex: Regex = RegexTrait::new("[0-9]{3}-[0-9]{2}-[0-9]{4}"); + let fully_masked = ssn_regex.replace(masked_cc, "XXX-XX-XXXX"); + println!("After masking SSN:"); + println!("{}", fully_masked); + + // Example 5: Matching character classes + println!("== Example 5: Character Classes =="); + let mut digit_regex: Regex = RegexTrait::new("[0-9]+"); + let text_with_numbers: ByteArray = "abc123def456"; + + let matches = digit_regex.find_all(text_with_numbers); + println!("Found {} number sequences:", matches.len()); + + let mut i = 0; + while i < matches.len() { + let (start, end) = *matches.at(i); + let mut number = ""; + let mut j = start; + while j < end { + number.append_byte(text_with_numbers.at(j).unwrap()); + j += 1; + }; + println!(" {}: {}", i + 1, number); + i += 1; + }; + + // Example 6: Wildcards + println!("== Example 6: Wildcards =="); + let mut wildcard_regex: Regex = RegexTrait::new("c.t"); + let words: ByteArray = "cat cut cot cit"; + + let matches = wildcard_regex.find_all(words); + println!("Words matching 'c.t' pattern:"); + + let mut i = 0; + while i < matches.len() { + let (start, end) = *matches.at(i); + let mut word = ""; + let mut j = start; + while j < end { + word.append_byte(words.at(j).unwrap()); + j += 1; + }; + println!(" {}", word); + i += 1; + }; + + // Example 7: Quantifiers + println!("== Example 7: Quantifiers =="); + let text: ByteArray = "color colour flavor flavour"; + + // Zero or one occurrence (American/British spelling) + let mut color_regex: Regex = RegexTrait::new("colou?r"); + let matches = color_regex.find_all(text); + + println!("Words matching 'colou?r' (zero or one 'u'):"); + let mut i = 0; + while i < matches.len() { + let (start, end) = *matches.at(i); + let mut word = ""; + let mut j = start; + while j < end { + word.append_byte(text.at(j).unwrap()); + j += 1; + }; + println!(" {}", word); + i += 1; + }; + + // One or more occurrences + let mut letters_regex: Regex = RegexTrait::new("a+"); + let repeated_text: ByteArray = "a aa aaa aaaa"; + let matches = letters_regex.find_all(repeated_text); + + println!("Sequences matching 'a+' (one or more 'a'):"); + let mut i = 0; + while i < matches.len() { + let (start, end) = *matches.at(i); + let mut sequence = ""; + let mut j = start; + while j < end { + sequence.append_byte(repeated_text.at(j).unwrap()); + j += 1; + }; + println!(" {}: {} a's", i + 1, end - start); + i += 1; + }; + + println!("== End of Examples =="); +} diff --git a/examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo b/examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo new file mode 100644 index 0000000..d14c9ed --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo @@ -0,0 +1,194 @@ +#[cfg(test)] +mod test_main { + use regex::regex::{Regex, RegexTrait}; + + #[test] + fn test_new_and_matches() { + // Email pattern + let pattern: ByteArray = "[a-z0-9]+@[a-z]+.[a-z]+"; + let regex: Regex = RegexTrait::new(pattern); + + // Valid email should match + let valid_email: ByteArray = "user@example.com"; + assert!(regex.matches(valid_email), "Valid email should match pattern"); + + // Invalid email should not match + let invalid_email: ByteArray = "invalid-email"; + assert!(!regex.matches(invalid_email), "Invalid email should not match pattern"); + } + + #[test] + fn test_find() { + let text: ByteArray = "Contact us at support@company.com for help"; + let pattern: ByteArray = "[a-z]+@[a-z]+.[a-z]+"; + let regex = RegexTrait::new(pattern); + + // Should find the email in the text + let result = regex.find(text); + assert!(result.is_some(), "Should find email in text"); + + match result { + Option::Some((start, end)) => { + assert!(start == 14, "Email should start at position 14"); + assert!(end == 33, "Email should end at position 33"); + + // Extract the matched part + let mut matched_text = ""; + let mut i = start; + while i < end { + matched_text.append_byte(text.at(i).unwrap()); + i += 1; + } + assert!(matched_text == "support@company.com", "Should extract correct email"); + }, + Option::None => { + assert!(false, "Email not found but should be found"); + } + } + + // Test with no match + let no_email_text: ByteArray = "This text has no email addresses"; + assert!(regex.find(no_email_text).is_none(), "Should not find email in text without email"); + } + + #[test] + fn test_find_all() { + let text: ByteArray = "Emails: user1@example.com, user2@example.com, admin@site.org"; + let pattern: ByteArray = "[a-z0-9]+@[a-z]+.[a-z]+"; + let regex = RegexTrait::new(pattern); + + // Should find all three emails + let matches = regex.find_all(text); + assert!(matches.len() == 3, "Should find 3 email addresses"); + + // Check first match + let (start1, end1) = *matches.at(0); + let mut email1 = ""; + let mut i = start1; + while i < end1 { + email1.append_byte(text.at(i).unwrap()); + i += 1; + } + assert!(email1 == "user1@example.com", "First email should be user1@example.com"); + + // Check second match + let (start2, end2) = *matches.at(1); + let mut email2 = ""; + let mut i = start2; + while i < end2 { + email2.append_byte(text.at(i).unwrap()); + i += 1; + } + assert!(email2 == "user2@example.com", "Second email should be user2@example.com"); + + // Check third match + let (start3, end3) = *matches.at(2); + let mut email3 = ""; + let mut i = start3; + while i < end3 { + email3.append_byte(text.at(i).unwrap()); + i += 1; + } + assert!(email3 == "admin@site.org", "Third email should be admin@site.org"); + } + + #[test] + fn test_replace() { + let text: ByteArray = "Credit card: 1234-5678-9012-3456"; + let pattern: ByteArray = "[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}"; + let replacement: ByteArray = "XXXX-XXXX-XXXX-XXXX"; + let regex = RegexTrait::new(pattern); + + // Replace credit card number with masked version + let result = regex.replace(text, replacement); + assert!(result == "Credit card: XXXX-XXXX-XXXX-XXXX", "Credit card should be masked"); + + // Test multiple replacements + let multi_text: ByteArray = "Cards: 1234-5678-9012-3456 and 9876-5432-1098-7654"; + let multi_result = regex.replace(multi_text, replacement); + assert!( + multi_result == "Cards: XXXX-XXXX-XXXX-XXXX and XXXX-XXXX-XXXX-XXXX", + "Both credit cards should be masked" + ); + } + + #[test] + fn test_character_classes() { + // Test digit character class + let digit_pattern: ByteArray = "[0-9]+"; + let digit_regex = RegexTrait::new(digit_pattern); + + let text: ByteArray = "abc123def456"; + let matches = digit_regex.find_all(text); + assert!(matches.len() == 2, "Should find 2 number sequences"); + + // Check first match (123) + let (start1, end1) = *matches.at(0); + assert!(start1 == 3, "First number should start at position 3"); + assert!(end1 == 6, "First number should end at position 6"); + + // Check second match (456) + let (start2, end2) = *matches.at(1); + assert!(start2 == 9, "Second number should start at position 9"); + assert!(end2 == 12, "Second number should end at position 12"); + + // Test letter character class + let letter_pattern: ByteArray = "[a-z]+"; + let mut letter_regex = RegexTrait::new(letter_pattern); + + let alpha_matches = letter_regex.find_all(text); + assert!(alpha_matches.len() == 2, "Should find 2 letter sequences"); + + // Check first match (abc) + let (alpha_start1, alpha_end1) = *alpha_matches.at(0); + assert!(alpha_start1 == 0, "First letter sequence should start at position 0"); + assert!(alpha_end1 == 3, "First letter sequence should end at position 3"); + } + + #[test] + fn test_wildcards() { + let pattern: ByteArray = "c.t"; + let regex = RegexTrait::new(pattern); + + let text: ByteArray = "cat cut cot cit"; + let matches = regex.find_all(text); + assert!(matches.len() == 4, "Should match all 4 words"); + + // Test specific text with wildcard + let specific_text: ByteArray = "cat"; + assert!(regex.matches(specific_text), "cat should match c.t pattern"); + + let non_matching: ByteArray = "car"; + assert!(!regex.matches(non_matching), "car should not match c.t pattern"); + } + + #[test] + fn test_quantifiers() { + // Test zero or one quantifier + let optional_pattern: ByteArray = "colou?r"; + let optional_regex = RegexTrait::new(optional_pattern); + + let american: ByteArray = "color"; + let british: ByteArray = "colour"; + + assert!(optional_regex.matches(american), "color should match colou?r"); + assert!(optional_regex.matches(british), "colour should match colou?r"); + + // Test one or more quantifier + let one_plus_pattern: ByteArray = "a+"; + let one_plus_regex = RegexTrait::new(one_plus_pattern); + + let text: ByteArray = "a aa aaa"; + let matches = one_plus_regex.find_all(text); + assert!(matches.len() == 3, "Should find 3 sequences of a's"); + + // Check lengths of matches + let (_, end1) = *matches.at(0); + let (start2, end2) = *matches.at(1); + let (start3, end3) = *matches.at(2); + + assert!(end1 - matches.at(0).at(0) == 1, "First sequence should be 1 character"); + assert!(end2 - start2 == 2, "Second sequence should be 2 characters"); + assert!(end3 - start3 == 3, "Third sequence should be 3 characters"); + } +} From 4a7597f519fc7e23ac6d3cfbf64c8b7b97169cfd Mon Sep 17 00:00:00 2001 From: psychemist Date: Wed, 28 May 2025 17:12:02 +0100 Subject: [PATCH 3/5] chore: move test file to appropriate module; populate lib.cairo --- .../regex_utils/src/lib.cairo | 5 + .../regex_utils/tests/test_main.cairo | 194 ------------------ 2 files changed, 5 insertions(+), 194 deletions(-) delete mode 100644 examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo diff --git a/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo b/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo index d18669a..d3e6fc7 100644 --- a/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo +++ b/examples/cairo/scripts/utility_examples/regex_utils/src/lib.cairo @@ -1 +1,6 @@ mod main; + +#[cfg(test)] +mod tests { + mod test_main; +} diff --git a/examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo b/examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo deleted file mode 100644 index d14c9ed..0000000 --- a/examples/cairo/scripts/utility_examples/regex_utils/tests/test_main.cairo +++ /dev/null @@ -1,194 +0,0 @@ -#[cfg(test)] -mod test_main { - use regex::regex::{Regex, RegexTrait}; - - #[test] - fn test_new_and_matches() { - // Email pattern - let pattern: ByteArray = "[a-z0-9]+@[a-z]+.[a-z]+"; - let regex: Regex = RegexTrait::new(pattern); - - // Valid email should match - let valid_email: ByteArray = "user@example.com"; - assert!(regex.matches(valid_email), "Valid email should match pattern"); - - // Invalid email should not match - let invalid_email: ByteArray = "invalid-email"; - assert!(!regex.matches(invalid_email), "Invalid email should not match pattern"); - } - - #[test] - fn test_find() { - let text: ByteArray = "Contact us at support@company.com for help"; - let pattern: ByteArray = "[a-z]+@[a-z]+.[a-z]+"; - let regex = RegexTrait::new(pattern); - - // Should find the email in the text - let result = regex.find(text); - assert!(result.is_some(), "Should find email in text"); - - match result { - Option::Some((start, end)) => { - assert!(start == 14, "Email should start at position 14"); - assert!(end == 33, "Email should end at position 33"); - - // Extract the matched part - let mut matched_text = ""; - let mut i = start; - while i < end { - matched_text.append_byte(text.at(i).unwrap()); - i += 1; - } - assert!(matched_text == "support@company.com", "Should extract correct email"); - }, - Option::None => { - assert!(false, "Email not found but should be found"); - } - } - - // Test with no match - let no_email_text: ByteArray = "This text has no email addresses"; - assert!(regex.find(no_email_text).is_none(), "Should not find email in text without email"); - } - - #[test] - fn test_find_all() { - let text: ByteArray = "Emails: user1@example.com, user2@example.com, admin@site.org"; - let pattern: ByteArray = "[a-z0-9]+@[a-z]+.[a-z]+"; - let regex = RegexTrait::new(pattern); - - // Should find all three emails - let matches = regex.find_all(text); - assert!(matches.len() == 3, "Should find 3 email addresses"); - - // Check first match - let (start1, end1) = *matches.at(0); - let mut email1 = ""; - let mut i = start1; - while i < end1 { - email1.append_byte(text.at(i).unwrap()); - i += 1; - } - assert!(email1 == "user1@example.com", "First email should be user1@example.com"); - - // Check second match - let (start2, end2) = *matches.at(1); - let mut email2 = ""; - let mut i = start2; - while i < end2 { - email2.append_byte(text.at(i).unwrap()); - i += 1; - } - assert!(email2 == "user2@example.com", "Second email should be user2@example.com"); - - // Check third match - let (start3, end3) = *matches.at(2); - let mut email3 = ""; - let mut i = start3; - while i < end3 { - email3.append_byte(text.at(i).unwrap()); - i += 1; - } - assert!(email3 == "admin@site.org", "Third email should be admin@site.org"); - } - - #[test] - fn test_replace() { - let text: ByteArray = "Credit card: 1234-5678-9012-3456"; - let pattern: ByteArray = "[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}"; - let replacement: ByteArray = "XXXX-XXXX-XXXX-XXXX"; - let regex = RegexTrait::new(pattern); - - // Replace credit card number with masked version - let result = regex.replace(text, replacement); - assert!(result == "Credit card: XXXX-XXXX-XXXX-XXXX", "Credit card should be masked"); - - // Test multiple replacements - let multi_text: ByteArray = "Cards: 1234-5678-9012-3456 and 9876-5432-1098-7654"; - let multi_result = regex.replace(multi_text, replacement); - assert!( - multi_result == "Cards: XXXX-XXXX-XXXX-XXXX and XXXX-XXXX-XXXX-XXXX", - "Both credit cards should be masked" - ); - } - - #[test] - fn test_character_classes() { - // Test digit character class - let digit_pattern: ByteArray = "[0-9]+"; - let digit_regex = RegexTrait::new(digit_pattern); - - let text: ByteArray = "abc123def456"; - let matches = digit_regex.find_all(text); - assert!(matches.len() == 2, "Should find 2 number sequences"); - - // Check first match (123) - let (start1, end1) = *matches.at(0); - assert!(start1 == 3, "First number should start at position 3"); - assert!(end1 == 6, "First number should end at position 6"); - - // Check second match (456) - let (start2, end2) = *matches.at(1); - assert!(start2 == 9, "Second number should start at position 9"); - assert!(end2 == 12, "Second number should end at position 12"); - - // Test letter character class - let letter_pattern: ByteArray = "[a-z]+"; - let mut letter_regex = RegexTrait::new(letter_pattern); - - let alpha_matches = letter_regex.find_all(text); - assert!(alpha_matches.len() == 2, "Should find 2 letter sequences"); - - // Check first match (abc) - let (alpha_start1, alpha_end1) = *alpha_matches.at(0); - assert!(alpha_start1 == 0, "First letter sequence should start at position 0"); - assert!(alpha_end1 == 3, "First letter sequence should end at position 3"); - } - - #[test] - fn test_wildcards() { - let pattern: ByteArray = "c.t"; - let regex = RegexTrait::new(pattern); - - let text: ByteArray = "cat cut cot cit"; - let matches = regex.find_all(text); - assert!(matches.len() == 4, "Should match all 4 words"); - - // Test specific text with wildcard - let specific_text: ByteArray = "cat"; - assert!(regex.matches(specific_text), "cat should match c.t pattern"); - - let non_matching: ByteArray = "car"; - assert!(!regex.matches(non_matching), "car should not match c.t pattern"); - } - - #[test] - fn test_quantifiers() { - // Test zero or one quantifier - let optional_pattern: ByteArray = "colou?r"; - let optional_regex = RegexTrait::new(optional_pattern); - - let american: ByteArray = "color"; - let british: ByteArray = "colour"; - - assert!(optional_regex.matches(american), "color should match colou?r"); - assert!(optional_regex.matches(british), "colour should match colou?r"); - - // Test one or more quantifier - let one_plus_pattern: ByteArray = "a+"; - let one_plus_regex = RegexTrait::new(one_plus_pattern); - - let text: ByteArray = "a aa aaa"; - let matches = one_plus_regex.find_all(text); - assert!(matches.len() == 3, "Should find 3 sequences of a's"); - - // Check lengths of matches - let (_, end1) = *matches.at(0); - let (start2, end2) = *matches.at(1); - let (start3, end3) = *matches.at(2); - - assert!(end1 - matches.at(0).at(0) == 1, "First sequence should be 1 character"); - assert!(end2 - start2 == 2, "Second sequence should be 2 characters"); - assert!(end3 - start3 == 3, "Third sequence should be 3 characters"); - } -} From b41b1865d17ef2982d44d11e5210b8228fb65bee Mon Sep 17 00:00:00 2001 From: psychemist Date: Wed, 28 May 2025 17:12:57 +0100 Subject: [PATCH 4/5] fix: fix bug with borrowed variables --- .../regex_utils/src/main.cairo | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo b/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo index c6856e1..4dc388e 100644 --- a/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo +++ b/examples/cairo/scripts/utility_examples/regex_utils/src/main.cairo @@ -6,23 +6,25 @@ fn main() { println!("== Example 1: new & matches =="); let email_pattern: ByteArray = "[a-z0-9]+@[a-z]+.[a-z]+"; let mut regex = RegexTrait::new(email_pattern); - + let valid_email: ByteArray = "user@example.com"; let invalid_email: ByteArray = "invalid-email"; - + println!("Valid email: {}", valid_email); - println!("Matches pattern? {}", regex.matches(valid_email)); - + println!("Matches pattern? {}", regex.matches(valid_email.clone())); + println!("Invalid email: {}", invalid_email); - println!("Matches pattern? {}", regex.matches(invalid_email)); + println!("Matches pattern? {}", regex.matches(invalid_email.clone())); // Example 2: Finding the first occurrence of a pattern println!("== Example 2: find =="); let text: ByteArray = "Contact us at support@company.com or sales@company.com"; let mut email_regex: Regex = RegexTrait::new("[a-z]+@[a-z]+.[a-z]+"); - - match email_regex.find(text) { - Option::Some((start, end)) => { + + match email_regex.find(text.clone()) { + Option::Some(( + start, end, + )) => { let mut email = ""; let mut i = start; while i < end { @@ -32,17 +34,16 @@ fn main() { println!("Found email: {}", email); println!("Position: {} to {}", start, end); }, - Option::None => { - println!("No email found in text"); - } + Option::None => { println!("No email found in text"); }, } // Example 3: Finding all occurrences of a pattern println!("== Example 3: find_all =="); - let log_text: ByteArray = "2024-01-15 ERROR Database connection failed//2024-01-15 ERROR Authentication error//2024-01-15 INFO Retry successful"; + let log_text: ByteArray = + "2024-01-15 ERROR Database connection failed//2024-01-15 ERROR Authentication error//2024-01-15 INFO Retry successful"; let mut error_regex: Regex = RegexTrait::new("ERROR.*"); - let matches = error_regex.find_all(log_text); - + let matches = error_regex.find_all(log_text.clone()); + println!("Found {} error messages:", matches.len()); let mut i = 0; while i < matches.len() { @@ -59,14 +60,15 @@ fn main() { // Example 4: Replacing patterns in text println!("== Example 4: replace =="); - let sensitive_text: ByteArray = "My credit card is 1234-5678-9012-3456 and my SSN is 123-45-6789"; - + let sensitive_text: ByteArray = + "My credit card is 1234-5678-9012-3456 and my SSN is 123-45-6789"; + // Replace credit card numbers let mut cc_regex: Regex = RegexTrait::new("[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}"); - let masked_cc = cc_regex.replace(sensitive_text, "XXXX-XXXX-XXXX-XXXX"); + let masked_cc = cc_regex.replace(sensitive_text.clone(), "XXXX-XXXX-XXXX-XXXX"); println!("After masking credit card:"); println!("{}", masked_cc); - + // Also replace SSN let mut ssn_regex: Regex = RegexTrait::new("[0-9]{3}-[0-9]{2}-[0-9]{4}"); let fully_masked = ssn_regex.replace(masked_cc, "XXX-XX-XXXX"); @@ -77,10 +79,10 @@ fn main() { println!("== Example 5: Character Classes =="); let mut digit_regex: Regex = RegexTrait::new("[0-9]+"); let text_with_numbers: ByteArray = "abc123def456"; - - let matches = digit_regex.find_all(text_with_numbers); + + let matches = digit_regex.find_all(text_with_numbers.clone()); println!("Found {} number sequences:", matches.len()); - + let mut i = 0; while i < matches.len() { let (start, end) = *matches.at(i); @@ -98,10 +100,10 @@ fn main() { println!("== Example 6: Wildcards =="); let mut wildcard_regex: Regex = RegexTrait::new("c.t"); let words: ByteArray = "cat cut cot cit"; - - let matches = wildcard_regex.find_all(words); + + let matches = wildcard_regex.find_all(words.clone()); println!("Words matching 'c.t' pattern:"); - + let mut i = 0; while i < matches.len() { let (start, end) = *matches.at(i); @@ -118,11 +120,11 @@ fn main() { // Example 7: Quantifiers println!("== Example 7: Quantifiers =="); let text: ByteArray = "color colour flavor flavour"; - + // Zero or one occurrence (American/British spelling) let mut color_regex: Regex = RegexTrait::new("colou?r"); - let matches = color_regex.find_all(text); - + let matches = color_regex.find_all(text.clone()); + println!("Words matching 'colou?r' (zero or one 'u'):"); let mut i = 0; while i < matches.len() { @@ -136,12 +138,12 @@ fn main() { println!(" {}", word); i += 1; }; - + // One or more occurrences let mut letters_regex: Regex = RegexTrait::new("a+"); let repeated_text: ByteArray = "a aa aaa aaaa"; - let matches = letters_regex.find_all(repeated_text); - + let matches = letters_regex.find_all(repeated_text.clone()); + println!("Sequences matching 'a+' (one or more 'a'):"); let mut i = 0; while i < matches.len() { From 214f198bbc29d7d7ef37dbe30a3ce1d52ab66b10 Mon Sep 17 00:00:00 2001 From: psychemist Date: Wed, 28 May 2025 17:13:00 +0100 Subject: [PATCH 5/5] test: simplify regex patterns --- .../regex_utils/src/tests/test_main.cairo | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 examples/cairo/scripts/utility_examples/regex_utils/src/tests/test_main.cairo diff --git a/examples/cairo/scripts/utility_examples/regex_utils/src/tests/test_main.cairo b/examples/cairo/scripts/utility_examples/regex_utils/src/tests/test_main.cairo new file mode 100644 index 0000000..a9c49ea --- /dev/null +++ b/examples/cairo/scripts/utility_examples/regex_utils/src/tests/test_main.cairo @@ -0,0 +1,196 @@ +#[cfg(test)] +mod test_main { + use regex::regex::{Regex, RegexTrait}; + + #[test] + fn test_new_and_matches() { + // Simple email-like pattern (simplified for this regex engine) + let pattern: ByteArray = "[a-z]+@[a-z]+.[a-z]+"; + let mut regex: Regex = RegexTrait::new(pattern); + + // Valid email should match + let valid_email: ByteArray = "user@example.com"; + assert!(regex.matches(valid_email.clone()), "Valid email should match pattern"); + + // Invalid email should not match + let invalid_email: ByteArray = "invalid-email"; + assert!(!regex.matches(invalid_email.clone()), "Invalid email should not match pattern"); + } + + #[test] + fn test_find() { + let text: ByteArray = "Contact us at support@company.com for help"; + let pattern: ByteArray = "[a-z]+@[a-z]+.[a-z]+"; + let mut regex = RegexTrait::new(pattern); + + // Should find the email in the text + let result = regex.find(text.clone()); + assert!(result.is_some(), "Should find email in text"); + + match result { + Option::Some((start, end)) => { + assert!(start == 14, "Email should start at position 14"); + assert!(end == 33, "Email should end at position 33"); + + // Extract the matched part + let mut matched_text = ""; + let mut i = start; + while i < end { + matched_text.append_byte(text.at(i).unwrap()); + i += 1; + }; + assert!(matched_text == "support@company.com", "Should extract correct email"); + }, + Option::None => { + assert!(false, "Email not found but should be found"); + }, + } + + // Test with no match + let no_email_text: ByteArray = "This text has no email addresses"; + assert!(regex.find(no_email_text.clone()).is_none(), "Should not find email in text without email"); + } + + #[test] + fn test_find_all() { + // Use simpler text that matches the basic pattern better + let text: ByteArray = "user@site.com admin@site.org test@site.net"; + let pattern: ByteArray = "[a-z]+@[a-z]+.[a-z]+"; + let mut regex = RegexTrait::new(pattern); + + // Should find all three emails + let matches = regex.find_all(text.clone()); + assert!(matches.len() == 3, "Should find 3 email addresses"); + + // Check first match + let (start1, end1) = *matches.at(0); + let mut email1 = ""; + let mut i = start1; + while i < end1 { + email1.append_byte(text.at(i).unwrap()); + i += 1; + }; + assert!(email1 == "user@site.com", "First email should be user@site.com"); + + // Check second match + let (start2, end2) = *matches.at(1); + let mut email2 = ""; + let mut i = start2; + while i < end2 { + email2.append_byte(text.at(i).unwrap()); + i += 1; + }; + assert!(email2 == "admin@site.org", "Second email should be admin@site.org"); + + // Check third match + let (start3, end3) = *matches.at(2); + let mut email3 = ""; + let mut i = start3; + while i < end3 { + email3.append_byte(text.at(i).unwrap()); + i += 1; + }; + assert!(email3 == "test@site.net", "Third email should be test@site.net"); + } + + #[test] + fn test_replace() { + let text: ByteArray = "Credit card: 1234-5678-9012-3456"; + // Use a simpler pattern without {4} repetition syntax + let pattern: ByteArray = "[0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]"; + let replacement: ByteArray = "XXXX-XXXX-XXXX-XXXX"; + let mut regex = RegexTrait::new(pattern); + + // Replace credit card number with masked version + let result = regex.replace(text.clone(), replacement.clone()); + assert!(result == "Credit card: XXXX-XXXX-XXXX-XXXX", "Credit card should be masked"); + + // Test multiple replacements + let multi_text: ByteArray = "Cards: 1234-5678-9012-3456 and 9876-5432-1098-7654"; + let multi_result = regex.replace(multi_text.clone(), replacement.clone()); + assert!( + multi_result == "Cards: XXXX-XXXX-XXXX-XXXX and XXXX-XXXX-XXXX-XXXX", + "Both credit cards should be masked", + ); + } + + #[test] + fn test_character_classes() { + // Test digit character class + let digit_pattern: ByteArray = "[0-9]+"; + let mut digit_regex = RegexTrait::new(digit_pattern); + + let text: ByteArray = "abc123def456"; + let matches = digit_regex.find_all(text.clone()); + assert!(matches.len() == 2, "Should find 2 number sequences"); + + // Check first match (123) + let (start1, end1) = *matches.at(0); + assert!(start1 == 3, "First number should start at position 3"); + assert!(end1 == 6, "First number should end at position 6"); + + // Check second match (456) + let (start2, end2) = *matches.at(1); + assert!(start2 == 9, "Second number should start at position 9"); + assert!(end2 == 12, "Second number should end at position 12"); + + // Test letter character class + let letter_pattern: ByteArray = "[a-z]+"; + let mut letter_regex = RegexTrait::new(letter_pattern); + + let alpha_matches = letter_regex.find_all(text.clone()); + assert!(alpha_matches.len() == 2, "Should find 2 letter sequences"); + + // Check first match (abc) + let (alpha_start1, alpha_end1) = *alpha_matches.at(0); + assert!(alpha_start1 == 0, "First letter sequence should start at position 0"); + assert!(alpha_end1 == 3, "First letter sequence should end at position 3"); + } + + #[test] + fn test_wildcards() { + let pattern: ByteArray = "c.t"; + let mut regex = RegexTrait::new(pattern); + + let text: ByteArray = "cat cut cot cit"; + let matches = regex.find_all(text.clone()); + assert!(matches.len() == 4, "Should match all 4 words"); + + // Test specific text with wildcard + let specific_text: ByteArray = "cat"; + assert!(regex.matches(specific_text.clone()), "cat should match c.t pattern"); + + let non_matching: ByteArray = "car"; + assert!(!regex.matches(non_matching.clone()), "car should not match c.t pattern"); + } + + #[test] + fn test_quantifiers() { + // Test zero or one quantifier + let optional_pattern: ByteArray = "colou?r"; + let mut optional_regex = RegexTrait::new(optional_pattern); + + let american: ByteArray = "color"; + let british: ByteArray = "colour"; + + assert!(optional_regex.matches(american.clone()), "color should match colou?r"); + assert!(optional_regex.matches(british.clone()), "colour should match colou?r"); + + // Test one or more quantifier + let one_plus_pattern: ByteArray = "a+"; + let mut one_plus_regex = RegexTrait::new(one_plus_pattern); + + let text: ByteArray = "a aa aaa"; + let matches = one_plus_regex.find_all(text.clone()); + assert!(matches.len() == 3, "Should find 3 sequences of a's"); + + // Check lengths of matches + let (start1, end1) = *matches.at(0); + let (start2, end2) = *matches.at(1); + let (start3, end3) = *matches.at(2); + + assert!(end1 - start1 == 1, "First sequence should be 1 character"); + assert!(end2 - start2 == 2, "Second sequence should be 2 characters"); + assert!(end3 - start3 == 3, "Third sequence should be 3 characters"); + } +}