Skip to content

Commit d5c6d6a

Browse files
feat: add recursive regex pattern matching for metadata filtering (#914)
1 parent 61dc55e commit d5c6d6a

File tree

6 files changed

+244
-5
lines changed

6 files changed

+244
-5
lines changed

docs/v2/filters/select.mdx

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,22 @@ assets = ["asset17jd78wukhtrnmjh3fngzasxm8rck0l2r4hhyyt"]
9292
datum = "datum1httkxyxp8x0dlpdt3k6cwng5pxj3j"
9393
```
9494

95+
## Metadata Filtering
96+
97+
Match any tx that holds a particular metadata label
98+
99+
```toml
100+
predicate = "#674"
101+
```
102+
103+
Match transactions with metadata containing a regex pattern (recursively searches arrays and maps — including map keys and values — and matches only text metadatum)
104+
105+
```toml
106+
[filters.predicate.match.metadata]
107+
label = 674
108+
109+
[filters.predicate.match.metadata.value.text]
110+
regex = "(?i)hello.*world" # Case-insensitive
111+
```
112+
113+
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Metadata Regex Filter Example
2+
3+
Filter transactions by metadata content using regex patterns.
4+
5+
## Configuration
6+
7+
```toml
8+
[[filters]]
9+
type = "Select"
10+
skip_uncertain = false
11+
12+
[filters.predicate.match.metadata]
13+
label = 674
14+
15+
[filters.predicate.match.metadata.value.text]
16+
regex = "testing regex"
17+
```
18+
19+
## Running
20+
21+
```bash
22+
oura daemon --config ./daemon.toml
23+
```
24+
25+
## Features
26+
27+
- **Recursive search**: Automatically searches through nested arrays and maps
28+
- **Flexible patterns**: Use standard regex syntax
29+
- **Optional label**: Omit `label` field to search across all metadata
30+
31+
## Common Patterns
32+
33+
```toml
34+
regex = "(?i)keyword" # Case-insensitive
35+
regex = "^MyApp:" # Starts with
36+
regex = "payment|donation" # Multiple keywords
37+
```
38+
39+
## See Also
40+
41+
- [Select Filter Documentation](../../docs/v2/filters/select.mdx)
42+
- [CIP-20 Specification](https://cips.cardano.org/cips/cip20/)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
[chain]
2+
type = "preprod"
3+
4+
[source]
5+
type = "N2N"
6+
peers = ["preprod-node.world.dev.cardano.org:30000"]
7+
8+
[intersect]
9+
type = "Tip"
10+
11+
[[filters]]
12+
type = "SplitBlock"
13+
14+
[[filters]]
15+
type = "ParseCbor"
16+
17+
[[filters]]
18+
type = "Select"
19+
skip_uncertain = false
20+
21+
[filters.predicate.match.metadata]
22+
label = 674
23+
24+
[filters.predicate.match.metadata.value.text]
25+
regex = "Hello World"
26+
27+
[sink]
28+
type = "Stdout"

src/filters/select/eval/metadata.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use super::*;
22

33
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
4+
#[serde(rename_all = "lowercase")]
45
pub enum MetadatumPattern {
56
Text(TextPattern),
67
Int(NumericPattern<i64>),
@@ -120,4 +121,67 @@ mod tests {
120121
));
121122
assert_eq!(positives, Vec::<usize>::new());
122123
}
124+
125+
/// Tests regex pattern construction and basic matching.
126+
#[test]
127+
fn regex_text_value_match() {
128+
use regex::Regex;
129+
130+
let pattern = MetadataPattern {
131+
label: Some(674),
132+
value: Some(MetadatumPattern::Text(TextPattern::Regex(
133+
Regex::new(r"testing regex").unwrap(),
134+
))),
135+
};
136+
137+
assert!(pattern.label.is_some());
138+
assert!(pattern.value.is_some());
139+
140+
if let Some(MetadatumPattern::Text(TextPattern::Regex(regex))) = &pattern.value {
141+
assert!(regex.is_match("testing regex"));
142+
assert!(regex.is_match("this contains testing regex inside"));
143+
assert!(!regex.is_match("no match here"));
144+
} else {
145+
panic!("Expected Text(Regex) pattern");
146+
}
147+
}
148+
149+
/// Tests regex pattern matching against different metadatum types.
150+
#[test]
151+
fn regex_text_value_matches_metadatum() {
152+
use pallas::interop::utxorpc::spec::cardano::metadatum;
153+
use regex::Regex;
154+
155+
let text_pattern = TextPattern::Regex(Regex::new(r"Hello World").unwrap());
156+
157+
let text_metadatum = Metadatum {
158+
metadatum: metadatum::Metadatum::Text("Hello World".to_string()).into(),
159+
};
160+
assert_eq!(
161+
text_pattern.is_match(&text_metadatum),
162+
MatchOutcome::Positive
163+
);
164+
165+
let no_match = Metadatum {
166+
metadatum: metadatum::Metadatum::Text("Goodbye".to_string()).into(),
167+
};
168+
assert_eq!(text_pattern.is_match(&no_match), MatchOutcome::Negative);
169+
170+
let int_metadatum = Metadatum {
171+
metadatum: metadatum::Metadatum::Int(42).into(),
172+
};
173+
assert_eq!(
174+
text_pattern.is_match(&int_metadatum),
175+
MatchOutcome::Negative
176+
);
177+
178+
let bytes_metadatum = Metadatum {
179+
metadatum: metadatum::Metadatum::Bytes(vec![0xFF, 0xFE, 0xFD].into()).into(),
180+
};
181+
assert_eq!(
182+
text_pattern.is_match(&bytes_metadatum),
183+
MatchOutcome::Negative
184+
);
185+
}
186+
123187
}

src/filters/select/eval/mod.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,16 +194,29 @@ impl PatternOf<u64> for CoinPattern {
194194
}
195195
}
196196

197-
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
197+
#[derive(Serialize, Deserialize, Clone, Debug)]
198+
#[serde(rename_all = "lowercase")]
198199
pub enum TextPattern {
199200
Exact(String),
200-
// TODO: Regex
201+
#[serde(with = "serde_ext::regex_pattern")]
202+
Regex(regex::Regex),
203+
}
204+
205+
impl PartialEq for TextPattern {
206+
fn eq(&self, other: &Self) -> bool {
207+
match (self, other) {
208+
(TextPattern::Exact(a), TextPattern::Exact(b)) => a.eq(b),
209+
(TextPattern::Regex(a), TextPattern::Regex(b)) => a.as_str() == b.as_str(),
210+
_ => false,
211+
}
212+
}
201213
}
202214

203215
impl PatternOf<&str> for TextPattern {
204216
fn is_match(&self, subject: &str) -> MatchOutcome {
205217
match self {
206-
TextPattern::Exact(x) => MatchOutcome::if_equal(x.as_str(), subject),
218+
TextPattern::Exact(x) => MatchOutcome::if_true(x.eq(subject)),
219+
TextPattern::Regex(x) => MatchOutcome::if_true(x.is_match(subject)),
207220
}
208221
}
209222
}
@@ -221,9 +234,17 @@ impl PatternOf<&[u8]> for TextPattern {
221234

222235
impl PatternOf<&Metadatum> for TextPattern {
223236
fn is_match(&self, subject: &Metadatum) -> MatchOutcome {
237+
use pallas::interop::utxorpc::spec::cardano::metadatum::Metadatum as M;
238+
224239
match subject.metadatum.as_ref() {
225-
Some(pallas::interop::utxorpc::spec::cardano::metadatum::Metadatum::Text(subject)) => {
226-
self.is_match(subject.as_str())
240+
Some(M::Text(text)) => self.is_match(text.as_str()),
241+
Some(M::Array(array)) => self.is_any_match(array.items.iter()),
242+
Some(M::Map(map)) => {
243+
let key_matches =
244+
self.is_any_match(map.pairs.iter().filter_map(|p| p.key.as_ref()));
245+
let value_matches =
246+
self.is_any_match(map.pairs.iter().filter_map(|p| p.value.as_ref()));
247+
key_matches + value_matches
227248
}
228249
_ => MatchOutcome::Negative,
229250
}
@@ -642,6 +663,48 @@ mod tests {
642663
assert!(matches!(pattern, Pattern::Metadata(..)));
643664
}
644665

666+
/// Tests PartialEq implementation for TextPattern.
667+
#[test]
668+
fn text_pattern_equality() {
669+
use regex::Regex;
670+
671+
let pattern1 = TextPattern::Regex(Regex::new(r"test").unwrap());
672+
let pattern2 = TextPattern::Regex(Regex::new(r"test").unwrap());
673+
let pattern3 = TextPattern::Regex(Regex::new(r"different").unwrap());
674+
let pattern4 = TextPattern::Exact("test".to_string());
675+
let pattern5 = TextPattern::Exact("test".to_string());
676+
677+
assert_eq!(pattern1, pattern2);
678+
assert_ne!(pattern1, pattern3);
679+
assert_eq!(pattern4, pattern5);
680+
assert_ne!(pattern1, pattern4);
681+
}
682+
683+
#[test]
684+
fn text_pattern_exact_match() {
685+
let pattern = TextPattern::Exact("hello".to_string());
686+
687+
assert_eq!(pattern.is_match("hello"), MatchOutcome::Positive);
688+
assert_eq!(pattern.is_match("hello world"), MatchOutcome::Negative);
689+
}
690+
691+
/// Tests TextPattern matching against UTF-8 and invalid byte slices.
692+
#[test]
693+
fn text_pattern_matches_utf8_bytes() {
694+
use regex::Regex;
695+
696+
let pattern = TextPattern::Regex(Regex::new(r"hello").unwrap());
697+
698+
let utf8_bytes = b"hello world";
699+
assert_eq!(pattern.is_match(&utf8_bytes[..]), MatchOutcome::Positive);
700+
701+
let utf8_no_match = b"goodbye";
702+
assert_eq!(pattern.is_match(&utf8_no_match[..]), MatchOutcome::Negative);
703+
704+
let invalid_utf8 = vec![0xFF, 0xFE, 0xFD];
705+
assert_eq!(pattern.is_match(&invalid_utf8[..]), MatchOutcome::Uncertain);
706+
}
707+
645708
#[test]
646709
fn deser_predicate() {
647710
serde_json::from_str::<StringOrStruct<Predicate>>("\"addr1qx2fxv2umyhttkxyxp8x0dlpdt3k6cwng5pxj3jhsydzer3n0d3vllmyqwsx5wktcd8cc3sq835lu7drv2xwl2wywfgse35a3x\"").unwrap();

src/filters/select/eval/serde_ext.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,26 @@ pub trait FromBech32: Sized {
106106
.ok_or_else(|| anyhow::anyhow!("bech32 hrp '{}' is not compatible for this type", hrp))
107107
}
108108
}
109+
110+
/// Serde serialization/deserialization helpers for regex patterns.
111+
pub mod regex_pattern {
112+
use regex::Regex;
113+
use serde::{Deserialize, Deserializer, Serializer};
114+
115+
/// Serializes a Regex as its string representation.
116+
pub fn serialize<S>(regex: &Regex, serializer: S) -> Result<S::Ok, S::Error>
117+
where
118+
S: Serializer,
119+
{
120+
serializer.serialize_str(regex.as_str())
121+
}
122+
123+
/// Deserializes a string into a Regex.
124+
pub fn deserialize<'de, D>(deserializer: D) -> Result<Regex, D::Error>
125+
where
126+
D: Deserializer<'de>,
127+
{
128+
let s = String::deserialize(deserializer)?;
129+
Regex::new(&s).map_err(serde::de::Error::custom)
130+
}
131+
}

0 commit comments

Comments
 (0)