From e89afdc35c91d899e5c2752a68de3d356447ea8e Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Fri, 11 Jul 2025 13:04:46 -0700 Subject: [PATCH 1/7] Precommit test for memcmp expansion --- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 llvm/test/CodeGen/WebAssembly/memcmp-expand.ll diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll new file mode 100644 index 0000000000000..d11cdbcc4fe0f --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare i32 @memcmp(ptr, ptr, i32) + +define i1 @memcmp_expand_3(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_3: +; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 3 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3) + %res = icmp eq i32 %cmp_3, 0 + ret i1 %res +} + +define i1 @memcmp_expand_5(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_5: +; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 5 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5) + %res = icmp eq i32 %cmp_5, 0 + ret i1 %res +} + +define i1 @memcmp_expand_7(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_7: +; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 7 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7) + %res = icmp eq i32 %cmp_7, 0 + ret i1 %res +} + +define i1 @memcmp_expand_2(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_2: +; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push1=, 0($0):p2align=0 +; CHECK-NEXT: i32.load16_u $push0=, 0($1):p2align=0 +; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2) + %res = icmp eq i32 %cmp_2, 0 + ret i1 %res +} + + +define i1 @memcmp_expand_8(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_8: +; CHECK: .functype memcmp_expand_8 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i64.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_8 = call i32 @memcmp(ptr %a, ptr %b, i32 8) + %res = icmp eq i32 %cmp_8, 0 + ret i1 %res +} + + +define i1 @memcmp_expand_16(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_16: +; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 16 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +} + + + + From d5103f62e28173fff2c720a64bbe32a7db9cdd22 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Fri, 11 Jul 2025 14:13:42 -0700 Subject: [PATCH 2/7] [WASM] Expand memcmp for small size --- .../WebAssembly/WebAssemblyISelLowering.cpp | 4 + .../WebAssemblyTargetTransformInfo.cpp | 10 +++ .../WebAssemblyTargetTransformInfo.h | 4 + .../test/CodeGen/WebAssembly/memcmp-expand.ll | 88 +++++++++++++++---- 4 files changed, 90 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index bf2e04caa0a61..a91a58db2a422 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -46,6 +46,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( : TargetLowering(TM), Subtarget(&STI) { auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + // Set the load count for memcmp expand optimization + MaxLoadsPerMemcmp = 3; + MaxLoadsPerMemcmpOptSize = 2; + // Booleans always contain 0 or 1. setBooleanContents(ZeroOrOneBooleanContent); // Except in SIMD vectors diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 4f159996e4c6c..3686fce33f3ca 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -141,6 +141,16 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost( return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } +WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions +WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + // INFO: I'm not sure what determines this, setting 2 conservatively + Options.NumLoadsPerBlock = 2; + Options.LoadSizes.append({8, 4, 2, 1}); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + return Options; +} + InstructionCost WebAssemblyTTIImpl::getMemoryOpCost( unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index d83b8d1f45dbd..c915eeb07d4fd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -73,6 +73,10 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const override; + + TTI::MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; + InstructionCost getMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index d11cdbcc4fe0f..64476e3b0b844 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -9,10 +9,21 @@ define i1 @memcmp_expand_3(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_3: ; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const $push0=, 3 -; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 -; CHECK-NEXT: i32.eqz $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: i32.load16_u $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load16_u $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 2 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i32.const $push13=, 2 +; CHECK-NEXT: i32.add $push1=, $1, $pop13 +; CHECK-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.const $push10=, 65535 +; CHECK-NEXT: i32.and $push11=, $pop9, $pop10 +; CHECK-NEXT: i32.eqz $push12=, $pop11 +; CHECK-NEXT: return $pop12 %cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3) %res = icmp eq i32 %cmp_3, 0 ret i1 %res @@ -22,10 +33,19 @@ define i1 @memcmp_expand_5(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_5: ; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const $push0=, 5 -; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 -; CHECK-NEXT: i32.eqz $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 4 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i32.const $push11=, 4 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 %cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5) %res = icmp eq i32 %cmp_5, 0 ret i1 %res @@ -35,10 +55,37 @@ define i1 @memcmp_expand_7(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_7: ; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const $push0=, 7 -; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 -; CHECK-NEXT: i32.eqz $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 4 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load16_u $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push17=, 4 +; CHECK-NEXT: i32.add $push1=, $1, $pop17 +; CHECK-NEXT: i32.load16_u $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: br_if 0, $pop9 # 0: down to label1 +; CHECK-NEXT: # %bb.1: # %loadbb1 +; CHECK-NEXT: i32.const $2=, 0 +; CHECK-NEXT: i32.const $push10=, 6 +; CHECK-NEXT: i32.add $push13=, $0, $pop10 +; CHECK-NEXT: i32.load8_u $push14=, 0($pop13) +; CHECK-NEXT: i32.const $push18=, 6 +; CHECK-NEXT: i32.add $push11=, $1, $pop18 +; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) +; CHECK-NEXT: i32.eq $push15=, $pop14, $pop12 +; CHECK-NEXT: br_if 1, $pop15 # 1: down to label0 +; CHECK-NEXT: .LBB2_2: # %res_block +; CHECK-NEXT: end_block # label1: +; CHECK-NEXT: i32.const $2=, 1 +; CHECK-NEXT: .LBB2_3: # %endblock +; CHECK-NEXT: end_block # label0: +; CHECK-NEXT: i32.eqz $push16=, $2 +; CHECK-NEXT: return $pop16 %cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7) %res = icmp eq i32 %cmp_7, 0 ret i1 %res @@ -76,10 +123,19 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const $push0=, 16 -; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 -; CHECK-NEXT: i32.eqz $push2=, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 8 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i64.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res From 3dd1cd75e9a2aa1426df3b4614c0b92a0eb55468 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Sun, 13 Jul 2025 06:59:56 -0700 Subject: [PATCH 3/7] Add more test for comparison against 8/4 options --- llvm/test/CodeGen/WebAssembly/memcmp-expand.ll | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 64476e3b0b844..36a0a178353e3 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -91,6 +91,21 @@ define i1 @memcmp_expand_7(ptr %a, ptr %b) { ret i1 %res } +; INFO: Negative test +; Should not expand even with simd128 +define i1 @memcmp_expand_129(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_129: +; CHECK: .functype memcmp_expand_129 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 129 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_129 = call i32 @memcmp(ptr %a, ptr %b, i32 129) + %res = icmp eq i32 %cmp_129, 0 + ret i1 %res +} + define i1 @memcmp_expand_2(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_2: ; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32) @@ -119,6 +134,7 @@ define i1 @memcmp_expand_8(ptr %a, ptr %b) { } +; TODO: Should be using a single load i64x2 or equivalent in bitsizes define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) From 94074145ac1167e0c0de83c5c4a199dab1a294fe Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Sun, 13 Jul 2025 07:00:45 -0700 Subject: [PATCH 4/7] Change to 8/4 load option with new NumLoadPerBlock --- .../WebAssembly/WebAssemblyISelLowering.cpp | 4 +- .../WebAssemblyTargetTransformInfo.cpp | 8 ++- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 54 ++++++------------- 3 files changed, 25 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a91a58db2a422..7ed87e110d57f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -47,8 +47,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; // Set the load count for memcmp expand optimization - MaxLoadsPerMemcmp = 3; - MaxLoadsPerMemcmpOptSize = 2; + MaxLoadsPerMemcmp = 8; + MaxLoadsPerMemcmpOptSize = 4; // Booleans always contain 0 or 1. setBooleanContents(ZeroOrOneBooleanContent); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 3686fce33f3ca..2180882753024 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -145,9 +145,15 @@ WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { TTI::MemCmpExpansionOptions Options; // INFO: I'm not sure what determines this, setting 2 conservatively - Options.NumLoadsPerBlock = 2; + Options.AllowOverlappingLoads = true; + + if (ST->hasSIMD128()) + Options.LoadSizes.push_back(16); Options.LoadSizes.append({8, 4, 2, 1}); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + return Options; } diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 36a0a178353e3..4d77456be22b5 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -55,37 +55,19 @@ define i1 @memcmp_expand_7(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_7: ; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: block -; CHECK-NEXT: block ; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 ; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 ; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 -; CHECK-NEXT: i32.const $push0=, 4 +; CHECK-NEXT: i32.const $push0=, 3 ; CHECK-NEXT: i32.add $push3=, $0, $pop0 -; CHECK-NEXT: i32.load16_u $push4=, 0($pop3):p2align=0 -; CHECK-NEXT: i32.const $push17=, 4 -; CHECK-NEXT: i32.add $push1=, $1, $pop17 -; CHECK-NEXT: i32.load16_u $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i32.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 3 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i32.load $push2=, 0($pop1):p2align=0 ; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 ; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 -; CHECK-NEXT: br_if 0, $pop9 # 0: down to label1 -; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: i32.const $2=, 0 -; CHECK-NEXT: i32.const $push10=, 6 -; CHECK-NEXT: i32.add $push13=, $0, $pop10 -; CHECK-NEXT: i32.load8_u $push14=, 0($pop13) -; CHECK-NEXT: i32.const $push18=, 6 -; CHECK-NEXT: i32.add $push11=, $1, $pop18 -; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) -; CHECK-NEXT: i32.eq $push15=, $pop14, $pop12 -; CHECK-NEXT: br_if 1, $pop15 # 1: down to label0 -; CHECK-NEXT: .LBB2_2: # %res_block -; CHECK-NEXT: end_block # label1: -; CHECK-NEXT: i32.const $2=, 1 -; CHECK-NEXT: .LBB2_3: # %endblock -; CHECK-NEXT: end_block # label0: -; CHECK-NEXT: i32.eqz $push16=, $2 -; CHECK-NEXT: return $pop16 +; CHECK-NEXT: i32.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 %cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7) %res = icmp eq i32 %cmp_7, 0 ret i1 %res @@ -139,19 +121,15 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 -; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 -; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 -; CHECK-NEXT: i32.const $push0=, 8 -; CHECK-NEXT: i32.add $push3=, $0, $pop0 -; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 -; CHECK-NEXT: i32.const $push11=, 8 -; CHECK-NEXT: i32.add $push1=, $1, $pop11 -; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 -; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 -; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 -; CHECK-NEXT: i64.eqz $push10=, $pop9 -; CHECK-NEXT: return $pop10 +; CHECK-NEXT: i64.load $push4=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push3=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop3 +; CHECK-NEXT: i64.load $push1=, 8($0):p2align=0 +; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 +; CHECK-NEXT: i64.xor $push2=, $pop1, $pop0 +; CHECK-NEXT: i64.or $push6=, $pop5, $pop2 +; CHECK-NEXT: i64.eqz $push7=, $pop6 +; CHECK-NEXT: return $pop7 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res From f5e27a84c07bbe0523a028ac4704a438f40a14b0 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Wed, 16 Jul 2025 18:35:57 -0700 Subject: [PATCH 5/7] Added TODO for simd128 memcmp expansion --- .../WebAssemblyTargetTransformInfo.cpp | 9 ++++---- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 22 +++++++++++-------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 2180882753024..2af2196e7e884 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -144,13 +144,14 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost( WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { TTI::MemCmpExpansionOptions Options; - // INFO: I'm not sure what determines this, setting 2 conservatively + Options.AllowOverlappingLoads = true; - if (ST->hasSIMD128()) - Options.LoadSizes.push_back(16); + // TODO: Teach WebAssembly backend about load v128. + // if (ST->hasSIMD128()) + // Options.LoadSizes.push_back(16); + Options.LoadSizes.append({8, 4, 2, 1}); - Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 4d77456be22b5..42466670ac519 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -121,15 +121,19 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i64.load $push4=, 0($0):p2align=0 -; CHECK-NEXT: i64.load $push3=, 0($1):p2align=0 -; CHECK-NEXT: i64.xor $push5=, $pop4, $pop3 -; CHECK-NEXT: i64.load $push1=, 8($0):p2align=0 -; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 -; CHECK-NEXT: i64.xor $push2=, $pop1, $pop0 -; CHECK-NEXT: i64.or $push6=, $pop5, $pop2 -; CHECK-NEXT: i64.eqz $push7=, $pop6 -; CHECK-NEXT: return $pop7 +; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 8 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i64.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res From 3509d9b75b5f6203d6c1526040fa1c271df18dbb Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Wed, 16 Jul 2025 19:16:05 -0700 Subject: [PATCH 6/7] Fix formatting issue --- llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 2af2196e7e884..5e1b2fdcbf39c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -150,7 +150,7 @@ WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { // TODO: Teach WebAssembly backend about load v128. // if (ST->hasSIMD128()) // Options.LoadSizes.push_back(16); - + Options.LoadSizes.append({8, 4, 2, 1}); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; From 8a8c1533cf3534330f91582a6e94f019c531bb6a Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Sat, 19 Jul 2025 19:49:44 -0700 Subject: [PATCH 7/7] Addresses nit in PR --- .../WebAssemblyTargetTransformInfo.cpp | 2 -- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 19 +++++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 5e1b2fdcbf39c..52e706514226b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -148,8 +148,6 @@ WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.AllowOverlappingLoads = true; // TODO: Teach WebAssembly backend about load v128. - // if (ST->hasSIMD128()) - // Options.LoadSizes.push_back(16); Options.LoadSizes.append({8, 4, 2, 1}); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 42466670ac519..8030438645f82 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -101,6 +101,18 @@ define i1 @memcmp_expand_2(ptr %a, ptr %b) { ret i1 %res } +define i1 @memcmp_expand_2_align(ptr align(2) %a, ptr align(2) %b) { +; CHECK-LABEL: memcmp_expand_2_align: +; CHECK: .functype memcmp_expand_2_align (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push1=, 0($0) +; CHECK-NEXT: i32.load16_u $push0=, 0($1) +; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2) + %res = icmp eq i32 %cmp_2, 0 + ret i1 %res +} define i1 @memcmp_expand_8(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_8: @@ -115,7 +127,6 @@ define i1 @memcmp_expand_8(ptr %a, ptr %b) { ret i1 %res } - ; TODO: Should be using a single load i64x2 or equivalent in bitsizes define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: @@ -138,7 +149,3 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) { %res = icmp eq i32 %cmp_16, 0 ret i1 %res } - - - -