Skip to content

Commit 48fe3bb

Browse files
authored
Winch: Add abs SIMD instructions for x86 using AVX (#10202)
* Winch: Add abs SIMD instructions for x86 using AVX * Add _simd_load.wast to unsupported if no AVX
1 parent ac76c09 commit 48fe3bb

File tree

14 files changed

+436
-15
lines changed

14 files changed

+436
-15
lines changed

crates/wast-util/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,6 @@ impl WastTest {
442442
"spec_testsuite/simd_i32x4_extmul_i16x8.wast",
443443
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
444444
"spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
445-
"spec_testsuite/simd_i64x2_arith2.wast",
446445
"spec_testsuite/simd_i64x2_extmul_i32x4.wast",
447446
"spec_testsuite/simd_i8x16_arith2.wast",
448447
"spec_testsuite/simd_lane.wast",
@@ -474,6 +473,7 @@ impl WastTest {
474473
"spec_testsuite/simd_f64x2_cmp.wast",
475474
"spec_testsuite/simd_i16x8_cmp.wast",
476475
"spec_testsuite/simd_i32x4_cmp.wast",
476+
"spec_testsuite/simd_i64x2_arith2.wast",
477477
"spec_testsuite/simd_i64x2_cmp.wast",
478478
"spec_testsuite/simd_i8x16_cmp.wast",
479479
"spec_testsuite/simd_int_to_int_extend.wast",
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(f32x4.abs (v128.const f32x4 0 1 2 3))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x49
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpcmpeqd %xmm15, %xmm15, %xmm15
24+
;; vpsrld $1, %xmm15, %xmm15
25+
;; vandps %xmm0, %xmm15, %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 49: ud2
30+
;; 4b: addb %al, (%rax)
31+
;; 4d: addb %al, (%rax)
32+
;; 4f: addb %al, (%rax)
33+
;; 51: addb %al, (%rax)
34+
;; 53: addb %al, (%rax)
35+
;; 55: addb %al, 0x3f(%rax)
36+
;; 5b: addb %al, (%rax)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(f64x2.abs (v128.const f64x2 0 1))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x49
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpcmpeqq %xmm15, %xmm15, %xmm15
24+
;; vpsrlq $1, %xmm15, %xmm15
25+
;; vandpd %xmm0, %xmm15, %xmm0
26+
;; addq $0x10, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 49: ud2
30+
;; 4b: addb %al, (%rax)
31+
;; 4d: addb %al, (%rax)
32+
;; 4f: addb %al, (%rax)
33+
;; 51: addb %al, (%rax)
34+
;; 53: addb %al, (%rax)
35+
;; 55: addb %al, (%rax)
36+
;; 57: addb %al, (%rax)
37+
;; 59: addb %al, (%rax)
38+
;; 5b: addb %al, (%rax)
39+
;; 5d: addb %dh, %al
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i16x8.abs (v128.const i16x8 0 1 2 3 4 5 6 7))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpabsw %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
35+
;; 4f: addb %al, (%rax)
36+
;; 51: addb %al, (%rcx)
37+
;; 53: addb %al, (%rdx)
38+
;; 55: addb %al, (%rbx)
39+
;; 57: addb %al, (%rax, %rax)
40+
;; 5a: addl $0x7000600, %eax
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i32x4.abs (v128.const i32x4 0 1 2 3))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpabsd %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
35+
;; 4f: addb %al, (%rax)
36+
;; 51: addb %al, (%rax)
37+
;; 53: addb %al, (%rcx)
38+
;; 55: addb %al, (%rax)
39+
;; 57: addb %al, (%rdx)
40+
;; 59: addb %al, (%rax)
41+
;; 5b: addb %al, (%rbx)
42+
;; 5d: addb %al, (%rax)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i64x2.abs (v128.const i64x2 0 1))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x4e
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpsrad $0x1f, %xmm0, %xmm15
24+
;; vpshufd $0xf5, %xmm15, %xmm15
25+
;; vpxor %xmm0, %xmm15, %xmm0
26+
;; vpsubq %xmm15, %xmm0, %xmm0
27+
;; addq $0x10, %rsp
28+
;; popq %rbp
29+
;; retq
30+
;; 4e: ud2
31+
;; 50: addb %al, (%rax)
32+
;; 52: addb %al, (%rax)
33+
;; 54: addb %al, (%rax)
34+
;; 56: addb %al, (%rax)
35+
;; 58: addl %eax, (%rax)
36+
;; 5a: addb %al, (%rax)
37+
;; 5c: addb %al, (%rax)
38+
;; 5e: addb %al, (%rax)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i8x16.abs (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movdqu 0x1c(%rip), %xmm0
23+
;; vpabsb %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
35+
;; 4f: addb %al, (%rax)
36+
;; 51: addl %eax, (%rdx)
37+
;; 53: addl 0x9080706(, %rax), %eax
38+
;; 5a: orb (%rbx), %cl
39+
;; 5c: orb $0xd, %al

tests/misc_testsuite/winch/_simd_load.wast

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@
8686
;; )
8787
;; (assert_return (invoke "as-f32x4.mul-operand") (v128.const f32x4 256 2 3.6 -2))
8888

89-
;; (module (memory 1)
90-
;; (data (offset (i32.const 0)) "\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff") ;; 1111 ...
91-
;; (func (export "as-f32x4.abs-operand") (result v128)
92-
;; (f32x4.abs (v128.load (i32.const 0)))
93-
;; )
94-
;; )
95-
;; (assert_return (invoke "as-f32x4.abs-operand") (v128.const i32x4 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff)) ;; 1111 -> 0111
89+
(module (memory 1)
90+
(data (offset (i32.const 0)) "\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff\ff") ;; 1111 ...
91+
(func (export "as-f32x4.abs-operand") (result v128)
92+
(f32x4.abs (v128.load (i32.const 0)))
93+
)
94+
)
95+
(assert_return (invoke "as-f32x4.abs-operand") (v128.const i32x4 0x7fffffff 0x7fffffff 0x7fffffff 0x7fffffff)) ;; 1111 -> 0111
9696

9797
;; (module (memory 1)
9898
;; (data (offset (i32.const 0)) "\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA\AA")

tests/misc_testsuite/winch/_simd_splat.wast

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@
273273
(f64x2.eq (f64x2.splat (local.get 0)) (f64x2.splat (local.get 1))))
274274

275275
;; Floating-point sign bit operations
276-
;; (func (export "as-f32x4_abs-operand") (param f32) (result v128)
277-
;; (f32x4.abs (f32x4.splat (local.get 0))))
276+
(func (export "as-f32x4_abs-operand") (param f32) (result v128)
277+
(f32x4.abs (f32x4.splat (local.get 0))))
278278

279279
;; Floating-point min
280280
;; (func (export "as-f32x4_min-operands") (param f32 f32) (result v128)
@@ -336,7 +336,7 @@
336336
(assert_return (invoke "as-i32x4_eq-operands2" (i64.const 1) (i64.const 2)) (v128.const i64x2 0xffffffff00000000 0xffffffff00000000))
337337
(assert_return (invoke "as-f64x2_eq-operands" (f64.const +0.0) (f64.const -0.0)) (v128.const i64x2 -1 -1))
338338

339-
;; (assert_return (invoke "as-f32x4_abs-operand" (f32.const -1.125)) (v128.const f32x4 1.125 1.125 1.125 1.125))
339+
(assert_return (invoke "as-f32x4_abs-operand" (f32.const -1.125)) (v128.const f32x4 1.125 1.125 1.125 1.125))
340340
;; (assert_return (invoke "as-f32x4_min-operands" (f32.const 0.25) (f32.const 1e-38)) (v128.const f32x4 1e-38 1e-38 1e-38 1e-38))
341341
;; (assert_return (invoke "as-f32x4_div-operands" (f32.const 1.0) (f32.const 8.0)) (v128.const f32x4 0.125 0.125 0.125 0.125))
342342

winch/codegen/src/isa/aarch64/masm.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ use crate::{
1616
CalleeKind, DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, HandleOverflowKind,
1717
Imm as I, IntCmpKind, LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm,
1818
RemKind, ReplaceLaneKind, RmwOp, RoundingMode, SPOffset, ShiftKind, SplatKind, StackSlot,
19-
StoreKind, TrapCode, TruncKind, V128ConvertKind, V128ExtendKind, V128NarrowKind,
20-
VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS, UNTRUSTED_FLAGS,
19+
StoreKind, TrapCode, TruncKind, V128AbsKind, V128ConvertKind, V128ExtendKind,
20+
V128NarrowKind, VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS,
21+
UNTRUSTED_FLAGS,
2122
},
2223
stack::TypedReg,
2324
};
@@ -1158,6 +1159,10 @@ impl Masm for MacroAssembler {
11581159
Err(anyhow!(CodeGenError::unimplemented_masm_instruction()))
11591160
}
11601161

1162+
fn v128_abs(&mut self, _src: Reg, _dst: WritableReg, _kind: V128AbsKind) -> Result<()> {
1163+
bail!(CodeGenError::unimplemented_masm_instruction())
1164+
}
1165+
11611166
fn v128_neg(&mut self, _op: WritableReg, _size: OperandSize) -> Result<()> {
11621167
Err(anyhow!(CodeGenError::unimplemented_masm_instruction()))
11631168
}

0 commit comments

Comments
 (0)