Skip to content

Commit fc5ee24

Browse files
ZJIT: Compile toregexp (ruby#14200)
`toregexp` is fairly similar to `concatstrings`, so this commit extracts a helper for pushing and popping operands on the native stack. There's probably opportunity to move some of this into lir (e.g. Alan suggested a push_many that could use STP on ARM to push 2 at a time), but I might save that for another day.
1 parent 6281806 commit fc5ee24

File tree

7 files changed

+146
-14
lines changed

7 files changed

+146
-14
lines changed

internal/re.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,9 @@ int rb_match_count(VALUE match);
2525
VALUE rb_reg_new_ary(VALUE ary, int options);
2626
VALUE rb_reg_last_defined(VALUE match);
2727

28+
#define ARG_REG_OPTION_MASK \
29+
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
30+
#define ARG_ENCODING_FIXED 16
31+
#define ARG_ENCODING_NONE 32
32+
2833
#endif /* INTERNAL_RE_H */

re.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
290290

291291
#define KCODE_FIXED FL_USER4
292292

293-
#define ARG_REG_OPTION_MASK \
294-
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
295-
#define ARG_ENCODING_FIXED 16
296-
#define ARG_ENCODING_NONE 32
297-
298293
static int
299294
char_to_option(int c)
300295
{

test/ruby/test_zjit.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,6 +1847,14 @@ def test = "#{}"
18471847
}, insns: [:concatstrings]
18481848
end
18491849

1850+
def test_regexp_interpolation
1851+
assert_compiles '/123/', %q{
1852+
def test = /#{1}#{2}#{3}/
1853+
1854+
test
1855+
}, insns: [:toregexp]
1856+
end
1857+
18501858
private
18511859

18521860
# Assert that every method call in `test_script` can be compiled by ZJIT

zjit/bindgen/src/main.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,13 @@ fn main() {
259259

260260
// From internal/re.h
261261
.allowlist_function("rb_reg_new_ary")
262+
.allowlist_var("ARG_ENCODING_FIXED")
263+
.allowlist_var("ARG_ENCODING_NONE")
264+
265+
// From include/ruby/onigmo.h
266+
.allowlist_var("ONIG_OPTION_IGNORECASE")
267+
.allowlist_var("ONIG_OPTION_EXTEND")
268+
.allowlist_var("ONIG_OPTION_MULTILINE")
262269

263270
// `ruby_value_type` is a C enum and this stops it from
264271
// prefixing all the members with the name of the type

zjit/src/codegen.rs

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
346346
Insn::StringConcat { strings, .. } if strings.is_empty() => return None,
347347
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)),
348348
Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)),
349+
Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)),
349350
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
350351
Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment
351352
Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)),
@@ -1595,36 +1596,58 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> {
15951596
})
15961597
}
15971598

1598-
fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd {
1599-
gen_prepare_non_leaf_call(jit, asm, state);
1599+
fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd {
1600+
let n = opnds.len();
16001601

16011602
// Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR
16021603
// At this point, frame_setup(&[], jit.c_stack_slots) has been called,
16031604
// which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack
16041605
let frame_size = aligned_stack_bytes(jit.c_stack_slots);
1605-
let n = strings.len();
16061606
let allocation_size = aligned_stack_bytes(n);
16071607

1608-
asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n);
1608+
asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n);
16091609
asm.sub_into(NATIVE_STACK_PTR, allocation_size.into());
16101610

16111611
// Calculate the total offset from NATIVE_BASE_PTR to our buffer
16121612
let total_offset_from_base = (frame_size + allocation_size) as i32;
16131613

1614-
for (idx, &string_opnd) in strings.iter().enumerate() {
1614+
for (idx, &opnd) in opnds.iter().enumerate() {
16151615
let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32);
16161616
asm.mov(
16171617
Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset),
1618-
string_opnd
1618+
opnd
16191619
);
16201620
}
16211621

1622-
let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base));
1623-
1624-
let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr);
1622+
asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base))
1623+
}
16251624

1625+
fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) {
16261626
asm_comment!(asm, "restore C stack pointer");
1627+
let allocation_size = aligned_stack_bytes(opnds.len());
16271628
asm.add_into(NATIVE_STACK_PTR, allocation_size.into());
1629+
}
1630+
1631+
fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec<Opnd>, state: &FrameState) -> Opnd {
1632+
gen_prepare_non_leaf_call(jit, asm, state);
1633+
1634+
let first_opnd_ptr = gen_push_opnds(jit, asm, &values);
1635+
1636+
let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr);
1637+
let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into());
1638+
asm_ccall!(asm, rb_ary_clear, tmp_ary);
1639+
1640+
gen_pop_opnds(asm, &values);
1641+
1642+
result
1643+
}
1644+
1645+
fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Opnd {
1646+
gen_prepare_non_leaf_call(jit, asm, state);
1647+
1648+
let first_string_ptr = gen_push_opnds(jit, asm, &strings);
1649+
let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr);
1650+
gen_pop_opnds(asm, &strings);
16281651

16291652
result
16301653
}

zjit/src/cruby_bindings.inc.rs

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

zjit/src/hir.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,9 @@ pub enum Insn {
473473
StringIntern { val: InsnId, state: InsnId },
474474
StringConcat { strings: Vec<InsnId>, state: InsnId },
475475

476+
/// Combine count stack values into a regexp
477+
ToRegexp { opt: usize, values: Vec<InsnId>, state: InsnId },
478+
476479
/// Put special object (VMCORE, CBASE, etc.) based on value_type
477480
PutSpecialObject { value_type: SpecialObjectType },
478481

@@ -668,6 +671,14 @@ pub struct InsnPrinter<'a> {
668671
ptr_map: &'a PtrPrintMap,
669672
}
670673

674+
static REGEXP_FLAGS: &[(u32, &str)] = &[
675+
(ONIG_OPTION_MULTILINE, "MULTILINE"),
676+
(ONIG_OPTION_IGNORECASE, "IGNORECASE"),
677+
(ONIG_OPTION_EXTEND, "EXTENDED"),
678+
(ARG_ENCODING_FIXED, "FIXEDENCODING"),
679+
(ARG_ENCODING_NONE, "NOENCODING"),
680+
];
681+
671682
impl<'a> std::fmt::Display for InsnPrinter<'a> {
672683
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
673684
match &self.inner {
@@ -716,6 +727,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
716727

717728
Ok(())
718729
}
730+
Insn::ToRegexp { values, opt, .. } => {
731+
write!(f, "ToRegexp")?;
732+
let mut prefix = " ";
733+
for value in values {
734+
write!(f, "{prefix}{value}")?;
735+
prefix = ", ";
736+
}
737+
738+
let opt = *opt as u32;
739+
if opt != 0 {
740+
write!(f, ", ")?;
741+
let mut sep = "";
742+
for (flag, name) in REGEXP_FLAGS {
743+
if opt & flag != 0 {
744+
write!(f, "{sep}{name}")?;
745+
sep = "|";
746+
}
747+
}
748+
}
749+
750+
Ok(())
751+
}
719752
Insn::Test { val } => { write!(f, "Test {val}") }
720753
Insn::IsNil { val } => { write!(f, "IsNil {val}") }
721754
Insn::Jump(target) => { write!(f, "Jump {target}") }
@@ -1179,6 +1212,7 @@ impl Function {
11791212
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
11801213
&StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
11811214
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
1215+
&ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state },
11821216
&Test { val } => Test { val: find!(val) },
11831217
&IsNil { val } => IsNil { val: find!(val) },
11841218
&Jump(ref target) => Jump(find_branch_edge!(target)),
@@ -1305,6 +1339,7 @@ impl Function {
13051339
Insn::StringCopy { .. } => types::StringExact,
13061340
Insn::StringIntern { .. } => types::Symbol,
13071341
Insn::StringConcat { .. } => types::StringExact,
1342+
Insn::ToRegexp { .. } => types::RegexpExact,
13081343
Insn::NewArray { .. } => types::ArrayExact,
13091344
Insn::ArrayDup { .. } => types::ArrayExact,
13101345
Insn::NewHash { .. } => types::HashExact,
@@ -1939,6 +1974,10 @@ impl Function {
19391974
worklist.extend(strings);
19401975
worklist.push_back(state);
19411976
}
1977+
&Insn::ToRegexp { ref values, state, .. } => {
1978+
worklist.extend(values);
1979+
worklist.push_back(state);
1980+
}
19421981
| &Insn::Return { val }
19431982
| &Insn::Throw { val, .. }
19441983
| &Insn::Test { val }
@@ -2863,6 +2902,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
28632902
let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id });
28642903
state.stack_push(insn_id);
28652904
}
2905+
YARVINSN_toregexp => {
2906+
// First arg contains the options (multiline, extended, ignorecase) used to create the regexp
2907+
let opt = get_arg(pc, 0).as_usize();
2908+
let count = get_arg(pc, 1).as_usize();
2909+
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
2910+
let values = state.stack_pop_n(count)?;
2911+
let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id });
2912+
state.stack_push(insn_id);
2913+
}
28662914
YARVINSN_newarray => {
28672915
let count = get_arg(pc, 0).as_usize();
28682916
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
@@ -5330,6 +5378,47 @@ mod tests {
53305378
"#]]);
53315379
}
53325380

5381+
#[test]
5382+
fn test_toregexp() {
5383+
eval(r##"
5384+
def test = /#{1}#{2}#{3}/
5385+
"##);
5386+
assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
5387+
fn test@<compiled>:2:
5388+
bb0(v0:BasicObject):
5389+
v2:Fixnum[1] = Const Value(1)
5390+
v4:BasicObject = ObjToString v2
5391+
v6:String = AnyToString v2, str: v4
5392+
v7:Fixnum[2] = Const Value(2)
5393+
v9:BasicObject = ObjToString v7
5394+
v11:String = AnyToString v7, str: v9
5395+
v12:Fixnum[3] = Const Value(3)
5396+
v14:BasicObject = ObjToString v12
5397+
v16:String = AnyToString v12, str: v14
5398+
v18:RegexpExact = ToRegexp v6, v11, v16
5399+
Return v18
5400+
"#]]);
5401+
}
5402+
5403+
#[test]
5404+
fn test_toregexp_with_options() {
5405+
eval(r##"
5406+
def test = /#{1}#{2}/mixn
5407+
"##);
5408+
assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
5409+
fn test@<compiled>:2:
5410+
bb0(v0:BasicObject):
5411+
v2:Fixnum[1] = Const Value(1)
5412+
v4:BasicObject = ObjToString v2
5413+
v6:String = AnyToString v2, str: v4
5414+
v7:Fixnum[2] = Const Value(2)
5415+
v9:BasicObject = ObjToString v7
5416+
v11:String = AnyToString v7, str: v9
5417+
v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING
5418+
Return v13
5419+
"#]]);
5420+
}
5421+
53335422
#[test]
53345423
fn throw() {
53355424
eval("

0 commit comments

Comments
 (0)