-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[WebAssembly] [Codegen] Add patterns for relaxed dot #163266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-webassembly Author: Jasmine Tang (badumbatish) ChangesThe pattern I added for For One current obstacles is I don't think there is any pattern to singly create a extmul pairwise from other instructions so the related to #55932 Full diff: https://github.com/llvm/llvm-project/pull/163266.diff 2 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78bce68c3..34b8167ee9c07 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1711,6 +1711,26 @@ defm RELAXED_DOT :
"i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
"i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
+def : Pat<
+ (v8i16 (add
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 0), (i32 1), (i32 4), (i32 5),
+ (i32 8), (i32 9), (i32 12), (i32 13),
+ (i32 16), (i32 17), (i32 20), (i32 21),
+ (i32 24), (i32 25), (i32 28), (i32 29)),
+ (wasm_shuffle
+ (v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
+ (v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
+ (i32 2), (i32 3), (i32 6), (i32 7),
+ (i32 10), (i32 11), (i32 14), (i32 15),
+ (i32 18), (i32 19), (i32 22), (i32 23),
+ (i32 26), (i32 27), (i32 30), (i32 31)))
+ ),
+ (v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs))
+>;
+
defm RELAXED_DOT_ADD :
RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
(outs), (ins),
@@ -1719,6 +1739,13 @@ defm RELAXED_DOT_ADD :
"i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
"i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
+def : Pat<
+ (v4i32 (add
+ (v4i32 (int_wasm_extadd_pairwise_signed
+ (v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))),
+ (v4i32 V128:$acc))),
+ (v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc)))
+ >;
//===----------------------------------------------------------------------===//
// Relaxed BFloat16 dot product
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll
new file mode 100644
index 0000000000000..9716cbe077080
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s
+
+target triple = "wasm32"
+; relaxed_dot stands for relaxed_dot_i8x16_i7x16_s, as in td
+; relaxed_dot_add stands for i32x4.relaxed_dot_i8x16_i7x16_add_s, as in td
+
+define <8 x i16> @relaxed_dot_sext_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_sext_1:
+; CHECK: .functype relaxed_dot_sext_1 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
+; CHECK-NEXT: return $pop0
+ %sext1 = sext <16 x i8> %a to <16 x i16>
+ %sext2 = sext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %sext1, %sext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+}
+
+
+define <8 x i16> @relaxed_dot_sext_2(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_sext_2:
+; CHECK: .functype relaxed_dot_sext_2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
+; CHECK-NEXT: return $pop0
+ %sext1 = sext <16 x i8> %a to <16 x i16>
+ %sext2 = sext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %sext1, %sext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle2, %shuffle1
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @relaxed_dot_sext_self(<16 x i8> %v) {
+; CHECK-LABEL: relaxed_dot_sext_self:
+; CHECK: .functype relaxed_dot_sext_self (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $0
+; CHECK-NEXT: return $pop0
+ %sext = sext <16 x i8> %v to <16 x i16>
+ %mul = mul <16 x i16> %sext, %sext
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @relaxed_dot_add_from_relaxed_dot(<16 x i8> %a, <16 x i8> %b, <4 x i32> %c) {
+; CHECK-LABEL: relaxed_dot_add_from_relaxed_dot:
+; CHECK: .functype relaxed_dot_add_from_relaxed_dot (v128, v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32x4.relaxed_dot_i8x16_i7x16_add_s $push0=, $0, $1, $2
+; CHECK-NEXT: return $pop0
+ %relaxed_dot_call = call <8 x i16> @llvm.wasm.relaxed.dot.i8x16.i7x16.signed(<16 x i8> %a, <16 x i8> %b)
+ %sext = call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> %relaxed_dot_call)
+ %res = add <4 x i32> %sext, %c
+ ret <4 x i32> %res
+}
+
+; INFO: Negative test
+define <8 x i16> @relaxed_dot_zext(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_zext:
+; CHECK: .functype relaxed_dot_zext (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.extmul_low_i8x16_u $push6=, $0, $1
+; CHECK-NEXT: local.tee $push5=, $2=, $pop6
+; CHECK-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1
+; CHECK-NEXT: local.tee $push3=, $1=, $pop4
+; CHECK-NEXT: i8x16.shuffle $push1=, $pop5, $pop3, 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: i8x16.shuffle $push0=, $2, $1, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
+; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
+; CHECK-NEXT: return $pop2
+ %zext1 = zext <16 x i8> %a to <16 x i16>
+ %zext2 = zext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %zext1, %zext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+
+}
+
+; INFO: Negative test
+define <8 x i16> @relaxed_dot_wrong_shuffle(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: relaxed_dot_wrong_shuffle:
+; CHECK: .functype relaxed_dot_wrong_shuffle (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push1=, $0, $1
+; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push0=, $0, $1
+; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
+; CHECK-NEXT: return $pop2
+ %sext1 = sext <16 x i8> %a to <16 x i16>
+ %sext2 = sext <16 x i8> %b to <16 x i16>
+ %mul = mul <16 x i16> %sext1, %sext2
+ %shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %res = add <8 x i16> %shuffle1, %shuffle2
+ ret <8 x i16> %res
+}
|
"i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", | ||
"i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>; | ||
|
||
def : Pat< |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we can also support patterns where rhs is zext?
EDIT: Maybe this is a bad idea, as this is likely to cause undesirable behaviour.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've also just noticed that the current implementation of these instructions are hard-coded for i7/i8 inputs but they need to be parameterized to support i15/i16.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've also just noticed that the current implementation of these instructions are hard-coded for i7/i8 inputs but they need to be parameterized to support i15/i16.
i've added parameterization to the pattern, let me know if it works
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. And sorry, I should have deleted that comment after my rethink. If we're only supporting sext i8, then we don't need the parameterization, plus the instruction definition still only uses i8x16_i7x16, and I don't think we'll support i16/i15.
Sorry I got confused, these instructions are weird! We'll go with your original implementation.
The pattern I added for
relaxed dot
similar to normal dot @ #151775.For
relaxed dot add
, i noticed that in the proposal the portion of dot implementation is similar torelaxed dot
, so I think we can add a pattern where after we do relaxed dot and do extadd pairwise, we can dorelaxed dot add
.One current obstacles is I don't think there is any pattern to singly create a extadd pairwise from other instructions so the
relaxed dot add
pattern would not cover a wide range of instructions.related to #55932