Skip to content

Commit b6527cf

Browse files
authored
pulley: Add some macro-instructions related to bounds-checks (bytecodealliance#9943)
* pulley: Add some macro-instructions related to bounds-checks This commit starts down the path of optimizing wasm loads/stores in Pulley with macro-instructions. It's expected that these instructions are so common that it's worth putting them in the 1-byte namespace of opcodes. Locally this gets a 10% speedup on the sightglass bz2 benchmark. * Remove incorrect lowering rule
1 parent ef1ec37 commit b6527cf

File tree

5 files changed

+281
-0
lines changed

5 files changed

+281
-0
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,33 @@
119119
(rule (lower (trapnz cond code))
120120
(side_effect (pulley_trap_if (lower_cond cond) code)))
121121

122+
;; Special-cases for bounds-checks-related traps emitted for wasm loads/stores.
123+
;; Each of these translates to a single "xbc" (x-register bounds check)
124+
;; instruction
125+
(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
126+
a
127+
b @ (value_type $I32))
128+
code))
129+
(side_effect (pulley_xbc32_bound32_trap a b 0 code)))
130+
131+
(rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
132+
(uextend a @ (value_type $I32))
133+
b @ (value_type $I64))
134+
code))
135+
(side_effect (pulley_xbc32_bound64_trap a b 0 code)))
136+
137+
(rule 2 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
138+
a
139+
(isub b @ (value_type $I32) (u8_from_iconst c)))
140+
code))
141+
(side_effect (pulley_xbc32_bound32_trap a b c code)))
142+
143+
(rule 2 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan)
144+
(uextend a @ (value_type $I32))
145+
(isub b @ (value_type $I64) (u8_from_iconst c)))
146+
code))
147+
(side_effect (pulley_xbc32_bound64_trap a b c code)))
148+
122149
;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123150

124151
(rule (lower (get_stack_pointer))

pulley/src/interp.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2371,6 +2371,26 @@ impl OpVisitor for Interpreter<'_> {
23712371
self.state[dst].set_i64(a.wrapping_abs());
23722372
ControlFlow::Continue(())
23732373
}
2374+
2375+
fn xbc32_bound64_trap(&mut self, addr: XReg, bound: XReg, off: u8) -> ControlFlow<Done> {
2376+
let bound = self.state[bound].get_u64();
2377+
let addr = u64::from(self.state[addr].get_u32());
2378+
if addr > bound.wrapping_sub(u64::from(off)) {
2379+
self.done_trap::<crate::XBc32Bound64Trap>()
2380+
} else {
2381+
ControlFlow::Continue(())
2382+
}
2383+
}
2384+
2385+
fn xbc32_bound32_trap(&mut self, addr: XReg, bound: XReg, off: u8) -> ControlFlow<Done> {
2386+
let bound = self.state[bound].get_u32();
2387+
let addr = self.state[addr].get_u32();
2388+
if addr > bound.wrapping_sub(u32::from(off)) {
2389+
self.done_trap::<crate::XBc32Bound32Trap>()
2390+
} else {
2391+
ControlFlow::Continue(())
2392+
}
2393+
}
23742394
}
23752395

23762396
impl ExtendedOpVisitor for Interpreter<'_> {

pulley/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,12 @@ macro_rules! for_each_op {
580580
xselect32 = XSelect32 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
581581
/// `dst = low32(cond) ? if_nonzero : if_zero`
582582
xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg };
583+
584+
/// `trapif(zext(low32(addr)) > bound - off)` (unsigned)
585+
xbc32_bound64_trap = XBc32Bound64Trap { addr: XReg, bound: XReg, off: u8 };
586+
587+
/// `trapif(zext(low32(addr)) > low32(bound) - off)` (unsigned)
588+
xbc32_bound32_trap = XBc32Bound32Trap { addr: XReg, bound: XReg, off: u8 };
583589
}
584590
};
585591
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
;;! target = "pulley32"
2+
;;! test = "compile"
3+
4+
(module
5+
(memory 1)
6+
7+
(func $load8 (param i32) (result i32)
8+
(i32.load8_u (local.get 0)))
9+
10+
(func $load16 (param i32) (result i32)
11+
(i32.load16_u (local.get 0)))
12+
13+
(func $load32 (param i32) (result i32)
14+
(i32.load (local.get 0)))
15+
16+
(func $load64 (param i32) (result i64)
17+
(i64.load (local.get 0)))
18+
19+
(func $load8_offset (param i32) (result i32)
20+
(i32.load8_u offset=32 (local.get 0)))
21+
22+
(func $load16_offset (param i32) (result i32)
23+
(i32.load16_u offset=32 (local.get 0)))
24+
25+
(func $load32_offset (param i32) (result i32)
26+
(i32.load offset=32 (local.get 0)))
27+
28+
(func $load64_offset (param i32) (result i64)
29+
(i64.load offset=32 (local.get 0)))
30+
)
31+
;; wasm[0]::function[0]::load8:
32+
;; push_frame
33+
;; xload32le_offset8 x6, x0, 52
34+
;; br_if_xulteq32 x6, x2, 0x14 // target = 0x19
35+
;; c: xload32le_offset8 x7, x0, 48
36+
;; xadd32 x7, x7, x2
37+
;; xload8_u32_offset8 x0, x7, 0
38+
;; pop_frame
39+
;; ret
40+
;; 19: trap
41+
;;
42+
;; wasm[0]::function[1]::load16:
43+
;; push_frame
44+
;; xload32le_offset8 x6, x0, 52
45+
;; xbc32_bound32_trap x2, x6, 2
46+
;; xload32le_offset8 x7, x0, 48
47+
;; xadd32 x7, x7, x2
48+
;; xload16le_u32_offset8 x0, x7, 0
49+
;; pop_frame
50+
;; ret
51+
;;
52+
;; wasm[0]::function[2]::load32:
53+
;; push_frame
54+
;; xload32le_offset8 x6, x0, 52
55+
;; xbc32_bound32_trap x2, x6, 4
56+
;; xload32le_offset8 x7, x0, 48
57+
;; xadd32 x7, x7, x2
58+
;; xload32le_offset8 x0, x7, 0
59+
;; pop_frame
60+
;; ret
61+
;;
62+
;; wasm[0]::function[3]::load64:
63+
;; push_frame
64+
;; xload32le_offset8 x6, x0, 52
65+
;; xbc32_bound32_trap x2, x6, 8
66+
;; xload32le_offset8 x7, x0, 48
67+
;; xadd32 x7, x7, x2
68+
;; xload64le_offset8 x0, x7, 0
69+
;; pop_frame
70+
;; ret
71+
;;
72+
;; wasm[0]::function[4]::load8_offset:
73+
;; push_frame
74+
;; xload32le_offset8 x6, x0, 52
75+
;; xbc32_bound32_trap x2, x6, 33
76+
;; xload32le_offset8 x7, x0, 48
77+
;; xadd32 x7, x7, x2
78+
;; xload8_u32_offset8 x0, x7, 32
79+
;; pop_frame
80+
;; ret
81+
;;
82+
;; wasm[0]::function[5]::load16_offset:
83+
;; push_frame
84+
;; xload32le_offset8 x6, x0, 52
85+
;; xbc32_bound32_trap x2, x6, 34
86+
;; xload32le_offset8 x7, x0, 48
87+
;; xadd32 x7, x7, x2
88+
;; xload16le_u32_offset8 x0, x7, 32
89+
;; pop_frame
90+
;; ret
91+
;;
92+
;; wasm[0]::function[6]::load32_offset:
93+
;; push_frame
94+
;; xload32le_offset8 x6, x0, 52
95+
;; xbc32_bound32_trap x2, x6, 36
96+
;; xload32le_offset8 x7, x0, 48
97+
;; xadd32 x7, x7, x2
98+
;; xload32le_offset8 x0, x7, 32
99+
;; pop_frame
100+
;; ret
101+
;;
102+
;; wasm[0]::function[7]::load64_offset:
103+
;; push_frame
104+
;; xload32le_offset8 x6, x0, 52
105+
;; xbc32_bound32_trap x2, x6, 40
106+
;; xload32le_offset8 x7, x0, 48
107+
;; xadd32 x7, x7, x2
108+
;; xload64le_offset8 x0, x7, 32
109+
;; pop_frame
110+
;; ret
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
;;! target = "pulley64"
2+
;;! test = "compile"
3+
4+
(module
5+
(memory 1)
6+
7+
(func $load8 (param i32) (result i32)
8+
(i32.load8_u (local.get 0)))
9+
10+
(func $load16 (param i32) (result i32)
11+
(i32.load16_u (local.get 0)))
12+
13+
(func $load32 (param i32) (result i32)
14+
(i32.load (local.get 0)))
15+
16+
(func $load64 (param i32) (result i64)
17+
(i64.load (local.get 0)))
18+
19+
(func $load8_offset (param i32) (result i32)
20+
(i32.load8_u offset=32 (local.get 0)))
21+
22+
(func $load16_offset (param i32) (result i32)
23+
(i32.load16_u offset=32 (local.get 0)))
24+
25+
(func $load32_offset (param i32) (result i32)
26+
(i32.load offset=32 (local.get 0)))
27+
28+
(func $load64_offset (param i32) (result i64)
29+
(i64.load offset=32 (local.get 0)))
30+
)
31+
;; wasm[0]::function[0]::load8:
32+
;; push_frame
33+
;; xload64le_offset8 x8, x0, 104
34+
;; zext32 x7, x2
35+
;; br_if_xulteq64 x8, x7, 0x14 // target = 0x1c
36+
;; f: xload64le_offset8 x8, x0, 96
37+
;; xadd64 x8, x8, x7
38+
;; xload8_u32_offset8 x0, x8, 0
39+
;; pop_frame
40+
;; ret
41+
;; 1c: trap
42+
;;
43+
;; wasm[0]::function[1]::load16:
44+
;; push_frame
45+
;; xload64le_offset8 x8, x0, 104
46+
;; zext32 x7, x2
47+
;; xbc32_bound64_trap x2, x8, 2
48+
;; xload64le_offset8 x8, x0, 96
49+
;; xadd64 x8, x8, x7
50+
;; xload16le_u32_offset8 x0, x8, 0
51+
;; pop_frame
52+
;; ret
53+
;;
54+
;; wasm[0]::function[2]::load32:
55+
;; push_frame
56+
;; xload64le_offset8 x8, x0, 104
57+
;; zext32 x7, x2
58+
;; xbc32_bound64_trap x2, x8, 4
59+
;; xload64le_offset8 x8, x0, 96
60+
;; xadd64 x8, x8, x7
61+
;; xload32le_offset8 x0, x8, 0
62+
;; pop_frame
63+
;; ret
64+
;;
65+
;; wasm[0]::function[3]::load64:
66+
;; push_frame
67+
;; xload64le_offset8 x8, x0, 104
68+
;; zext32 x7, x2
69+
;; xbc32_bound64_trap x2, x8, 8
70+
;; xload64le_offset8 x8, x0, 96
71+
;; xadd64 x8, x8, x7
72+
;; xload64le_offset8 x0, x8, 0
73+
;; pop_frame
74+
;; ret
75+
;;
76+
;; wasm[0]::function[4]::load8_offset:
77+
;; push_frame
78+
;; xload64le_offset8 x8, x0, 104
79+
;; zext32 x7, x2
80+
;; xbc32_bound64_trap x2, x8, 33
81+
;; xload64le_offset8 x8, x0, 96
82+
;; xadd64 x8, x8, x7
83+
;; xload8_u32_offset8 x0, x8, 32
84+
;; pop_frame
85+
;; ret
86+
;;
87+
;; wasm[0]::function[5]::load16_offset:
88+
;; push_frame
89+
;; xload64le_offset8 x8, x0, 104
90+
;; zext32 x7, x2
91+
;; xbc32_bound64_trap x2, x8, 34
92+
;; xload64le_offset8 x8, x0, 96
93+
;; xadd64 x8, x8, x7
94+
;; xload16le_u32_offset8 x0, x8, 32
95+
;; pop_frame
96+
;; ret
97+
;;
98+
;; wasm[0]::function[6]::load32_offset:
99+
;; push_frame
100+
;; xload64le_offset8 x8, x0, 104
101+
;; zext32 x7, x2
102+
;; xbc32_bound64_trap x2, x8, 36
103+
;; xload64le_offset8 x8, x0, 96
104+
;; xadd64 x8, x8, x7
105+
;; xload32le_offset8 x0, x8, 32
106+
;; pop_frame
107+
;; ret
108+
;;
109+
;; wasm[0]::function[7]::load64_offset:
110+
;; push_frame
111+
;; xload64le_offset8 x8, x0, 104
112+
;; zext32 x7, x2
113+
;; xbc32_bound64_trap x2, x8, 40
114+
;; xload64le_offset8 x8, x0, 96
115+
;; xadd64 x8, x8, x7
116+
;; xload64le_offset8 x0, x8, 32
117+
;; pop_frame
118+
;; ret

0 commit comments

Comments
 (0)