Skip to content

Commit 33ce087

Browse files
committed
feat: add std regex builtins
Upstream issue: google/jsonnet#1039
1 parent 777cdf5 commit 33ce087

File tree

5 files changed

+232
-0
lines changed

5 files changed

+232
-0
lines changed

Cargo.lock

Lines changed: 44 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/jrsonnet-evaluator/src/typed/conversions.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,22 @@ impl Typed for String {
223223
}
224224
}
225225

226+
impl Typed for StrValue {
227+
const TYPE: &'static ComplexValType = &ComplexValType::Simple(ValType::Str);
228+
229+
fn into_untyped(value: Self) -> Result<Val> {
230+
Ok(Val::Str(value))
231+
}
232+
233+
fn from_untyped(value: Val) -> Result<Self> {
234+
<Self as Typed>::TYPE.check(&value)?;
235+
match value {
236+
Val::Str(s) => Ok(s),
237+
_ => unreachable!(),
238+
}
239+
}
240+
}
241+
226242
impl Typed for char {
227243
const TYPE: &'static ComplexValType = &ComplexValType::Char;
228244

crates/jrsonnet-stdlib/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ serde_json = "1.0"
4242
serde_yaml_with_quirks = "0.8.24"
4343

4444
num-bigint = { version = "0.4.3", optional = true }
45+
regex = "1.8.4"
46+
lru = "0.10.0"
47+
rustc-hash = "1.1.0"
4548

4649
[build-dependencies]
4750
jrsonnet-parser.workspace = true

crates/jrsonnet-stdlib/src/lib.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ mod sets;
4444
pub use sets::*;
4545
mod compat;
4646
pub use compat::*;
47+
mod regex;
48+
pub use crate::regex::*;
4749

4850
pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
4951
let mut builder = ObjValueBuilder::new();
@@ -154,6 +156,8 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
154156
// Sets
155157
("setMember", builtin_set_member::INST),
156158
("setInter", builtin_set_inter::INST),
159+
// Regex
160+
("regexQuoteMeta", builtin_regex_quote_meta::INST),
157161
// Compat
158162
("__compare", builtin___compare::INST),
159163
]
@@ -187,6 +191,37 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
187191
.value(Val::Func(FuncVal::builtin(builtin_trace { settings })))
188192
.expect("no conflict");
189193

194+
// Regex
195+
let regex_cache = RegexCache::default();
196+
builder
197+
.member("regexFullMatch".into())
198+
.hide()
199+
.value(Val::Func(FuncVal::builtin(builtin_regex_full_match {
200+
cache: regex_cache.clone(),
201+
})))
202+
.expect("no conflict");
203+
builder
204+
.member("regexPartialMatch".into())
205+
.hide()
206+
.value(Val::Func(FuncVal::builtin(builtin_regex_partial_match {
207+
cache: regex_cache.clone(),
208+
})))
209+
.expect("no conflict");
210+
builder
211+
.member("regexReplace".into())
212+
.hide()
213+
.value(Val::Func(FuncVal::builtin(builtin_regex_replace {
214+
cache: regex_cache.clone(),
215+
})))
216+
.expect("no conflict");
217+
builder
218+
.member("regexGlobalReplace".into())
219+
.hide()
220+
.value(Val::Func(FuncVal::builtin(builtin_regex_global_replace {
221+
cache: regex_cache.clone(),
222+
})))
223+
.expect("no conflict");
224+
190225
builder
191226
.member("id".into())
192227
.hide()

crates/jrsonnet-stdlib/src/regex.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
use std::{cell::RefCell, hash::BuildHasherDefault, num::NonZeroUsize, rc::Rc};
2+
3+
use ::regex::Regex;
4+
use jrsonnet_evaluator::{
5+
error::{ErrorKind::*, Result},
6+
val::StrValue,
7+
IStr, ObjValueBuilder, Val,
8+
};
9+
use jrsonnet_macros::builtin;
10+
use lru::LruCache;
11+
use rustc_hash::FxHasher;
12+
13+
pub struct RegexCacheInner {
14+
cache: RefCell<LruCache<IStr, Rc<Regex>, BuildHasherDefault<FxHasher>>>,
15+
}
16+
impl Default for RegexCacheInner {
17+
fn default() -> Self {
18+
Self {
19+
cache: RefCell::new(LruCache::with_hasher(
20+
NonZeroUsize::new(20).unwrap(),
21+
BuildHasherDefault::default(),
22+
)),
23+
}
24+
}
25+
}
26+
pub type RegexCache = Rc<RegexCacheInner>;
27+
impl RegexCacheInner {
28+
fn parse(&self, pattern: IStr) -> Result<Rc<Regex>> {
29+
let mut cache = self.cache.borrow_mut();
30+
if let Some(found) = cache.get(&pattern) {
31+
return Ok(found.clone());
32+
}
33+
let regex = Regex::new(&pattern)
34+
.map_err(|e| RuntimeError(format!("regex parse failed: {e}").into()))?;
35+
let regex = Rc::new(regex);
36+
cache.push(pattern, regex.clone());
37+
Ok(regex)
38+
}
39+
}
40+
41+
pub fn regex_match_inner(regex: &Regex, str: String) -> Result<Val> {
42+
let mut out = ObjValueBuilder::with_capacity(3);
43+
44+
let mut captures = Vec::with_capacity(regex.captures_len());
45+
let mut named_captures = ObjValueBuilder::with_capacity(regex.capture_names().len());
46+
47+
let Some(captured) = regex.captures(&str) else {
48+
return Ok(Val::Null)
49+
};
50+
51+
for ele in captured.iter().skip(1) {
52+
if let Some(ele) = ele {
53+
captures.push(Val::Str(StrValue::Flat(ele.as_str().into())))
54+
} else {
55+
captures.push(Val::Str(StrValue::Flat(IStr::empty())))
56+
}
57+
}
58+
for (i, name) in regex
59+
.capture_names()
60+
.skip(1)
61+
.enumerate()
62+
.flat_map(|(i, v)| Some((i, v?)))
63+
{
64+
let capture = captures[i].clone();
65+
named_captures.member(name.into()).value(capture)?;
66+
}
67+
68+
out.member("string".into())
69+
.value_unchecked(Val::Str(captured.get(0).unwrap().as_str().into()));
70+
out.member("captures".into())
71+
.value_unchecked(Val::Arr(captures.into()));
72+
out.member("namedCaptures".into())
73+
.value_unchecked(Val::Obj(named_captures.build()));
74+
75+
Ok(Val::Obj(out.build()))
76+
}
77+
78+
#[builtin(fields(
79+
cache: RegexCache,
80+
))]
81+
pub fn builtin_regex_partial_match(
82+
this: &builtin_regex_partial_match,
83+
pattern: IStr,
84+
str: String,
85+
) -> Result<Val> {
86+
let regex = this.cache.parse(pattern)?;
87+
regex_match_inner(&regex, str)
88+
}
89+
90+
#[builtin(fields(
91+
cache: RegexCache,
92+
))]
93+
pub fn builtin_regex_full_match(
94+
this: &builtin_regex_full_match,
95+
pattern: StrValue,
96+
str: String,
97+
) -> Result<Val> {
98+
let pattern = format!("^{pattern}$").into();
99+
let regex = this.cache.parse(pattern)?;
100+
regex_match_inner(&regex, str)
101+
}
102+
103+
#[builtin]
104+
pub fn builtin_regex_quote_meta(pattern: String) -> String {
105+
regex::escape(&pattern)
106+
}
107+
108+
#[builtin(fields(
109+
cache: RegexCache,
110+
))]
111+
pub fn builtin_regex_replace(
112+
this: &builtin_regex_replace,
113+
str: String,
114+
pattern: IStr,
115+
to: String,
116+
) -> Result<String> {
117+
let regex = this.cache.parse(pattern)?;
118+
let replaced = regex.replace(&str, to);
119+
Ok(replaced.to_string())
120+
}
121+
122+
#[builtin(fields(
123+
cache: RegexCache,
124+
))]
125+
pub fn builtin_regex_global_replace(
126+
this: &builtin_regex_global_replace,
127+
str: String,
128+
pattern: IStr,
129+
to: String,
130+
) -> Result<String> {
131+
let regex = this.cache.parse(pattern)?;
132+
let replaced = regex.replace_all(&str, to);
133+
Ok(replaced.to_string())
134+
}

0 commit comments

Comments
 (0)