Skip to content

Commit e44646b

Browse files
authored
[WebAssembly] Lower ANY_EXTEND_VECTOR_INREG (#167529)
Treat it in the same manner of zero_extend_vector_inreg and generate an extend_low_u if possible. This is to try an prevent expensive shuffles from being generated instead. computeKnownBitsForTargetNode has also been updated to specify known zeros on extend_low_u.
1 parent dcab4cb commit e44646b

File tree

3 files changed

+42
-25
lines changed

3 files changed

+42
-25
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
319319

320320
// Support vector extending
321321
for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
322+
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
322323
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom);
323324
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
324325
}
@@ -1136,7 +1137,27 @@ void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
11361137
}
11371138
break;
11381139
}
1139-
1140+
case WebAssemblyISD::EXTEND_LOW_U:
1141+
case WebAssemblyISD::EXTEND_HIGH_U: {
1142+
// We know the high half, of each destination vector element, will be zero.
1143+
SDValue SrcOp = Op.getOperand(0);
1144+
EVT VT = SrcOp.getSimpleValueType();
1145+
unsigned BitWidth = Known.getBitWidth();
1146+
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1147+
assert(BitWidth >= 8 && "Unexpected width!");
1148+
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1149+
Known.Zero |= Mask;
1150+
} else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1151+
assert(BitWidth >= 16 && "Unexpected width!");
1152+
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1153+
Known.Zero |= Mask;
1154+
} else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1155+
assert(BitWidth >= 32 && "Unexpected width!");
1156+
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 32);
1157+
Known.Zero |= Mask;
1158+
}
1159+
break;
1160+
}
11401161
// For 128-bit addition if the upper bits are all zero then it's known that
11411162
// the upper bits of the result will have all bits guaranteed zero except the
11421163
// first.
@@ -1705,6 +1726,7 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
17051726
return LowerSIGN_EXTEND_INREG(Op, DAG);
17061727
case ISD::ZERO_EXTEND_VECTOR_INREG:
17071728
case ISD::SIGN_EXTEND_VECTOR_INREG:
1729+
case ISD::ANY_EXTEND_VECTOR_INREG:
17081730
return LowerEXTEND_VECTOR_INREG(Op, DAG);
17091731
case ISD::BUILD_VECTOR:
17101732
return LowerBUILD_VECTOR(Op, DAG);
@@ -2299,6 +2321,9 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
22992321

23002322
unsigned Ext;
23012323
switch (Op.getOpcode()) {
2324+
default:
2325+
llvm_unreachable("unexpected opcode");
2326+
case ISD::ANY_EXTEND_VECTOR_INREG:
23022327
case ISD::ZERO_EXTEND_VECTOR_INREG:
23032328
Ext = WebAssemblyISD::EXTEND_LOW_U;
23042329
break;

llvm/test/CodeGen/WebAssembly/simd-arith.ll

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,38 +1997,30 @@ define void @avgr_undef_shuffle_lanes(ptr %res, <8 x i8> %a, <8 x i8> %b, <8 x i
19971997
; SIMD128: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> ()
19981998
; SIMD128-NEXT: # %bb.0:
19991999
; SIMD128-NEXT: i8x16.avgr_u $push1=, $1, $2
2000-
; SIMD128-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
2001-
; SIMD128-NEXT: local.tee $push11=, $2=, $pop12
2000+
; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push8=, $pop1
2001+
; SIMD128-NEXT: local.tee $push7=, $2=, $pop8
20022002
; SIMD128-NEXT: i8x16.avgr_u $push0=, $3, $4
2003-
; SIMD128-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
2004-
; SIMD128-NEXT: local.tee $push9=, $4=, $pop10
2005-
; SIMD128-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
2006-
; SIMD128-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255
2007-
; SIMD128-NEXT: local.tee $push7=, $3=, $pop8
2008-
; SIMD128-NEXT: v128.and $push5=, $pop4, $pop7
2003+
; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push6=, $pop0
2004+
; SIMD128-NEXT: local.tee $push5=, $4=, $pop6
2005+
; SIMD128-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
20092006
; SIMD128-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31
2010-
; SIMD128-NEXT: v128.and $push3=, $pop2, $3
2011-
; SIMD128-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3
2012-
; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop6
2007+
; SIMD128-NEXT: i8x16.narrow_i16x8_u $push4=, $pop3, $pop2
2008+
; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop4
20132009
; SIMD128-NEXT: return
20142010
;
20152011
; SIMD128-FAST-LABEL: avgr_undef_shuffle_lanes:
20162012
; SIMD128-FAST: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> ()
20172013
; SIMD128-FAST-NEXT: # %bb.0:
20182014
; SIMD128-FAST-NEXT: i8x16.avgr_u $push1=, $1, $2
2019-
; SIMD128-FAST-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
2020-
; SIMD128-FAST-NEXT: local.tee $push11=, $2=, $pop12
2015+
; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push8=, $pop1
2016+
; SIMD128-FAST-NEXT: local.tee $push7=, $2=, $pop8
20212017
; SIMD128-FAST-NEXT: i8x16.avgr_u $push0=, $3, $4
2022-
; SIMD128-FAST-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
2023-
; SIMD128-FAST-NEXT: local.tee $push9=, $4=, $pop10
2024-
; SIMD128-FAST-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
2025-
; SIMD128-FAST-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255
2026-
; SIMD128-FAST-NEXT: local.tee $push7=, $3=, $pop8
2027-
; SIMD128-FAST-NEXT: v128.and $push5=, $pop4, $pop7
2018+
; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push6=, $pop0
2019+
; SIMD128-FAST-NEXT: local.tee $push5=, $4=, $pop6
2020+
; SIMD128-FAST-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
20282021
; SIMD128-FAST-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31
2029-
; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $3
2030-
; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3
2031-
; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop6
2022+
; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push4=, $pop3, $pop2
2023+
; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop4
20322024
; SIMD128-FAST-NEXT: return
20332025
;
20342026
; NO-SIMD128-LABEL: avgr_undef_shuffle_lanes:

llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ define i1 @test_any_v8i8(<8 x i8> %x) {
276276
; CHECK-LABEL: test_any_v8i8:
277277
; CHECK: .functype test_any_v8i8 (v128) -> (i32)
278278
; CHECK-NEXT: # %bb.0:
279-
; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
279+
; CHECK-NEXT: i16x8.extend_low_i8x16_u $push0=, $0
280280
; CHECK-NEXT: i32.const $push1=, 15
281281
; CHECK-NEXT: i16x8.shl $push2=, $pop0, $pop1
282282
; CHECK-NEXT: i32.const $push5=, 15
@@ -292,7 +292,7 @@ define i1 @test_all_v8i8(<8 x i8> %x) {
292292
; CHECK-LABEL: test_all_v8i8:
293293
; CHECK: .functype test_all_v8i8 (v128) -> (i32)
294294
; CHECK-NEXT: # %bb.0:
295-
; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
295+
; CHECK-NEXT: i16x8.extend_low_i8x16_u $push0=, $0
296296
; CHECK-NEXT: i32.const $push1=, 15
297297
; CHECK-NEXT: i16x8.shl $push2=, $pop0, $pop1
298298
; CHECK-NEXT: i32.const $push5=, 15

0 commit comments

Comments
 (0)