diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index eedfdb309d289..29713b5d236da 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1583,11 +1583,9 @@ def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v8i16 V128:$lhs), // MLA: v16i8 -> v4i32 def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs), (v16i8 V128:$rhs))), - (ADD_I32x4 (ADD_I32x4 (DOT (extend_low_s_I16x8 $lhs), - (extend_low_s_I16x8 $rhs)), - (DOT (extend_high_s_I16x8 $lhs), - (extend_high_s_I16x8 $rhs))), - $acc)>; + (ADD_I32x4 (ADD_I32x4 (extadd_pairwise_s_I32x4 (EXTMUL_LOW_S_I16x8 $lhs, $rhs)), + (extadd_pairwise_s_I32x4 (EXTMUL_HIGH_S_I16x8 $lhs, $rhs))), + $acc)>; def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v16i8 V128:$lhs), (v16i8 V128:$rhs))), (ADD_I32x4 (ADD_I32x4 (extadd_pairwise_u_I32x4 (EXTMUL_LOW_U_I16x8 $lhs, $rhs)), diff --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll index 314e1b4fc69a1..91cd3dd1ca4e7 100644 --- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll +++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll @@ -14,13 +14,11 @@ define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture n ; CHECK: i32x4.add ; MAX-BANDWIDTH: v128.load -; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s ; MAX-BANDWIDTH: v128.load -; MAX-BANDWIDTH: i16x8.extend_low_i8x16_s -; MAX-BANDWIDTH: i32x4.dot_i16x8_s -; MAX-BANDWIDTH: i16x8.extend_high_i8x16_s -; MAX-BANDWIDTH: i16x8.extend_high_i8x16_s -; MAX-BANDWIDTH: i32x4.dot_i16x8_s +; MAX-BANDWIDTH: i16x8.extmul_low_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s +; MAX-BANDWIDTH: i16x8.extmul_high_i8x16_s +; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_s ; MAX-BANDWIDTH: i32x4.add ; MAX-BANDWIDTH: i32x4.add