Skip to content

Commit d32793c

Browse files
authored
Revert "[WebAssembly] Combine i128 to v16i8 for setcc & expand memcmp for 16 byte loads with simd128" (#153360)
Reverts #149461 The first test w/ memcmp in `test/neon/test_neon_wasm_simd.cpp` in the Emscripten test suite has failed. This PR applies a revert so I can take a closer look at it Test case link: https://github.com/emscripten-core/emscripten/blob/main/test/neon/test_neon_wasm_simd.cpp Compile option: `em++ test_neon_wasm_simd.cpp -O2 -mfpu=neon -msimd128 -o something.js` Original comment report: #149461 (comment)
1 parent 48bfaa4 commit d32793c

File tree

4 files changed

+18
-150
lines changed

4 files changed

+18
-150
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 2 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,65 +3383,15 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
33833383
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
33843384
}
33853385

3386-
/// Try to convert a i128 comparison to a v16i8 comparison before type
3387-
/// legalization splits it up into chunks
3388-
static SDValue
3389-
combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
3390-
const WebAssemblySubtarget *Subtarget) {
3391-
3392-
SDLoc DL(N);
3393-
SDValue X = N->getOperand(0);
3394-
SDValue Y = N->getOperand(1);
3395-
EVT VT = N->getValueType(0);
3396-
EVT OpVT = X.getValueType();
3397-
3398-
SelectionDAG &DAG = DCI.DAG;
3399-
if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute(
3400-
Attribute::NoImplicitFloat))
3401-
return SDValue();
3402-
3403-
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3404-
// We're looking for an oversized integer equality comparison with SIMD
3405-
if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3406-
!Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3407-
return SDValue();
3408-
3409-
// Don't perform this combine if constructing the vector will be expensive.
3410-
auto IsVectorBitCastCheap = [](SDValue X) {
3411-
X = peekThroughBitcasts(X);
3412-
return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3413-
};
3414-
3415-
if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3416-
return SDValue();
3417-
3418-
SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3419-
SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3420-
SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3421-
3422-
SDValue Intr =
3423-
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3424-
{DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3425-
: Intrinsic::wasm_anytrue,
3426-
DL, MVT::i32),
3427-
Cmp});
3428-
3429-
return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32), CC);
3430-
}
3431-
34323386
static SDValue performSETCCCombine(SDNode *N,
3433-
TargetLowering::DAGCombinerInfo &DCI,
3434-
const WebAssemblySubtarget *Subtarget) {
3387+
TargetLowering::DAGCombinerInfo &DCI) {
34353388
if (!DCI.isBeforeLegalize())
34363389
return SDValue();
34373390

34383391
EVT VT = N->getValueType(0);
34393392
if (!VT.isScalarInteger())
34403393
return SDValue();
34413394

3442-
if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3443-
return V;
3444-
34453395
SDValue LHS = N->getOperand(0);
34463396
if (LHS->getOpcode() != ISD::BITCAST)
34473397
return SDValue();
@@ -3621,7 +3571,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
36213571
case ISD::BITCAST:
36223572
return performBitcastCombine(N, DCI);
36233573
case ISD::SETCC:
3624-
return performSETCCCombine(N, DCI, Subtarget);
3574+
return performSETCCCombine(N, DCI);
36253575
case ISD::VECTOR_SHUFFLE:
36263576
return performVECTOR_SHUFFLECombine(N, DCI);
36273577
case ISD::SIGN_EXTEND:

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,7 @@ WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
147147

148148
Options.AllowOverlappingLoads = true;
149149

150-
if (ST->hasSIMD128())
151-
Options.LoadSizes.push_back(16);
150+
// TODO: Teach WebAssembly backend about load v128.
152151

153152
Options.LoadSizes.append({8, 4, 2, 1});
154153
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);

llvm/test/CodeGen/WebAssembly/memcmp-expand.ll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
2+
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
33

44
target triple = "wasm32-unknown-unknown"
55

@@ -127,16 +127,24 @@ define i1 @memcmp_expand_8(ptr %a, ptr %b) {
127127
ret i1 %res
128128
}
129129

130+
; TODO: Should be using a single load i64x2 or equivalent in bitsizes
130131
define i1 @memcmp_expand_16(ptr %a, ptr %b) {
131132
; CHECK-LABEL: memcmp_expand_16:
132133
; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32)
133134
; CHECK-NEXT: # %bb.0:
134-
; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0
135-
; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0
136-
; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0
137-
; CHECK-NEXT: i8x16.all_true $push3=, $pop2
138-
; CHECK-NEXT: i32.eqz $push4=, $pop3
139-
; CHECK-NEXT: return $pop4
135+
; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0
136+
; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0
137+
; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6
138+
; CHECK-NEXT: i32.const $push0=, 8
139+
; CHECK-NEXT: i32.add $push3=, $0, $pop0
140+
; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0
141+
; CHECK-NEXT: i32.const $push11=, 8
142+
; CHECK-NEXT: i32.add $push1=, $1, $pop11
143+
; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0
144+
; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2
145+
; CHECK-NEXT: i64.or $push9=, $pop8, $pop5
146+
; CHECK-NEXT: i64.eqz $push10=, $pop9
147+
; CHECK-NEXT: return $pop10
140148
%cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16)
141149
%res = icmp eq i32 %cmp_16, 0
142150
ret i1 %res

llvm/test/CodeGen/WebAssembly/simd-setcc.ll

Lines changed: 0 additions & 89 deletions
This file was deleted.

0 commit comments

Comments
 (0)