From d0771dbc1997978d5f1d884803bb7bf13a0c4c57 Mon Sep 17 00:00:00 2001 From: zhijian Date: Fri, 12 Sep 2025 20:47:34 +0000 Subject: [PATCH 1/3] add test case for memcmp_fixsize for powerpc --- llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll | 98 +++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll diff --git a/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll new file mode 100644 index 0000000000000..62048f8f8dfc6 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P8 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P10 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P8 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P10 + +define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { +; CHECK-AIX64-32-P8-LABEL: cmpeq16: +; CHECK-AIX64-32-P8: # %bb.0: # %entry +; CHECK-AIX64-32-P8-NEXT: ld r5, 0(r3) +; CHECK-AIX64-32-P8-NEXT: ld r6, 0(r4) +; CHECK-AIX64-32-P8-NEXT: cmpld r5, r6 +; CHECK-AIX64-32-P8-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX64-32-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX64-32-P8-NEXT: ld r5, 8(r3) +; CHECK-AIX64-32-P8-NEXT: ld r4, 8(r4) +; CHECK-AIX64-32-P8-NEXT: li r3, 0 +; CHECK-AIX64-32-P8-NEXT: cmpld r5, r4 +; CHECK-AIX64-32-P8-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX64-32-P8-NEXT: L..BB0_2: # %res_block +; CHECK-AIX64-32-P8-NEXT: li r3, 1 +; CHECK-AIX64-32-P8-NEXT: L..BB0_3: # %endblock +; CHECK-AIX64-32-P8-NEXT: cntlzw r3, r3 +; CHECK-AIX64-32-P8-NEXT: srwi r3, r3, 5 +; CHECK-AIX64-32-P8-NEXT: blr +; +; CHECK-AIX64-32-P10-LABEL: cmpeq16: +; CHECK-AIX64-32-P10: # %bb.0: # %entry +; CHECK-AIX64-32-P10-NEXT: ld r5, 0(r3) +; CHECK-AIX64-32-P10-NEXT: ld r6, 0(r4) +; CHECK-AIX64-32-P10-NEXT: cmpld r5, r6 +; CHECK-AIX64-32-P10-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX64-32-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX64-32-P10-NEXT: ld r5, 8(r3) +; CHECK-AIX64-32-P10-NEXT: ld r4, 8(r4) +; CHECK-AIX64-32-P10-NEXT: li r3, 0 +; CHECK-AIX64-32-P10-NEXT: cmpld r5, r4 +; CHECK-AIX64-32-P10-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX64-32-P10-NEXT: L..BB0_2: # %res_block +; CHECK-AIX64-32-P10-NEXT: li r3, 1 +; CHECK-AIX64-32-P10-NEXT: L..BB0_3: # %endblock +; CHECK-AIX64-32-P10-NEXT: cntlzw r3, r3 +; CHECK-AIX64-32-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-AIX64-32-P10-NEXT: blr +; +; CHECK-LINUX64-P8-LABEL: cmpeq16: +; CHECK-LINUX64-P8: # %bb.0: # %entry +; CHECK-LINUX64-P8-NEXT: ld r5, 0(r3) +; CHECK-LINUX64-P8-NEXT: ld r6, 0(r4) +; CHECK-LINUX64-P8-NEXT: cmpld r5, r6 +; CHECK-LINUX64-P8-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX64-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX64-P8-NEXT: ld r5, 8(r3) +; CHECK-LINUX64-P8-NEXT: ld r4, 8(r4) +; CHECK-LINUX64-P8-NEXT: li r3, 0 +; CHECK-LINUX64-P8-NEXT: cmpld r5, r4 +; CHECK-LINUX64-P8-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX64-P8-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX64-P8-NEXT: li r3, 1 +; CHECK-LINUX64-P8-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX64-P8-NEXT: cntlzw r3, r3 +; CHECK-LINUX64-P8-NEXT: srwi r3, r3, 5 +; CHECK-LINUX64-P8-NEXT: blr +; +; CHECK-LINUX64-P10-LABEL: cmpeq16: +; CHECK-LINUX64-P10: # %bb.0: # %entry +; CHECK-LINUX64-P10-NEXT: ld r5, 0(r3) +; CHECK-LINUX64-P10-NEXT: ld r6, 0(r4) +; CHECK-LINUX64-P10-NEXT: cmpld r5, r6 +; CHECK-LINUX64-P10-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX64-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX64-P10-NEXT: ld r5, 8(r3) +; CHECK-LINUX64-P10-NEXT: ld r4, 8(r4) +; CHECK-LINUX64-P10-NEXT: li r3, 0 +; CHECK-LINUX64-P10-NEXT: cmpld r5, r4 +; CHECK-LINUX64-P10-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX64-P10-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX64-P10-NEXT: li r3, 1 +; CHECK-LINUX64-P10-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX64-P10-NEXT: cntlzw r3, r3 +; CHECK-LINUX64-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-LINUX64-P10-NEXT: blr +entry: + %bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i64 16) + %cmp = icmp eq i32 %bcmp, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i64) + From 3a0aa01557998e41a3c85de3ad2b25c243709c94 Mon Sep 17 00:00:00 2001 From: zhijian Date: Fri, 12 Sep 2025 21:42:25 +0000 Subject: [PATCH 2/3] add 32-bit test case --- llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll | 130 ++++++++++++++++++ ...{memcmp_fixsize.ll => memcmp64_fixsize.ll} | 0 2 files changed, 130 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll rename llvm/test/CodeGen/PowerPC/{memcmp_fixsize.ll => memcmp64_fixsize.ll} (100%) diff --git a/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll new file mode 100644 index 0000000000000..20eb54b7f721b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P8 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P10 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P8 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P10 + +define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { +; CHECK-AIX32-P8-LABEL: cmpeq16: +; CHECK-AIX32-P8: # %bb.0: # %entry +; CHECK-AIX32-P8-NEXT: lwz r5, 4(r3) +; CHECK-AIX32-P8-NEXT: lwz r6, 0(r3) +; CHECK-AIX32-P8-NEXT: lwz r7, 4(r4) +; CHECK-AIX32-P8-NEXT: lwz r8, 0(r4) +; CHECK-AIX32-P8-NEXT: xor r6, r6, r8 +; CHECK-AIX32-P8-NEXT: xor r5, r5, r7 +; CHECK-AIX32-P8-NEXT: or. r5, r5, r6 +; CHECK-AIX32-P8-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX32-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX32-P8-NEXT: lwz r5, 12(r3) +; CHECK-AIX32-P8-NEXT: lwz r3, 8(r3) +; CHECK-AIX32-P8-NEXT: lwz r6, 12(r4) +; CHECK-AIX32-P8-NEXT: lwz r4, 8(r4) +; CHECK-AIX32-P8-NEXT: xor r3, r3, r4 +; CHECK-AIX32-P8-NEXT: xor r4, r5, r6 +; CHECK-AIX32-P8-NEXT: or. r3, r4, r3 +; CHECK-AIX32-P8-NEXT: li r3, 0 +; CHECK-AIX32-P8-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX32-P8-NEXT: L..BB0_2: # %res_block +; CHECK-AIX32-P8-NEXT: li r3, 1 +; CHECK-AIX32-P8-NEXT: L..BB0_3: # %endblock +; CHECK-AIX32-P8-NEXT: cntlzw r3, r3 +; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-AIX32-P8-NEXT: blr +; +; CHECK-AIX32-P10-LABEL: cmpeq16: +; CHECK-AIX32-P10: # %bb.0: # %entry +; CHECK-AIX32-P10-NEXT: lwz r5, 4(r3) +; CHECK-AIX32-P10-NEXT: lwz r6, 0(r3) +; CHECK-AIX32-P10-NEXT: lwz r7, 4(r4) +; CHECK-AIX32-P10-NEXT: xor r5, r5, r7 +; CHECK-AIX32-P10-NEXT: lwz r8, 0(r4) +; CHECK-AIX32-P10-NEXT: xor r6, r6, r8 +; CHECK-AIX32-P10-NEXT: or. r5, r5, r6 +; CHECK-AIX32-P10-NEXT: bne cr0, L..BB0_2 +; CHECK-AIX32-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-AIX32-P10-NEXT: lwz r5, 12(r3) +; CHECK-AIX32-P10-NEXT: lwz r3, 8(r3) +; CHECK-AIX32-P10-NEXT: lwz r6, 12(r4) +; CHECK-AIX32-P10-NEXT: lwz r4, 8(r4) +; CHECK-AIX32-P10-NEXT: xor r3, r3, r4 +; CHECK-AIX32-P10-NEXT: xor r4, r5, r6 +; CHECK-AIX32-P10-NEXT: or. r3, r4, r3 +; CHECK-AIX32-P10-NEXT: li r3, 0 +; CHECK-AIX32-P10-NEXT: beq cr0, L..BB0_3 +; CHECK-AIX32-P10-NEXT: L..BB0_2: # %res_block +; CHECK-AIX32-P10-NEXT: li r3, 1 +; CHECK-AIX32-P10-NEXT: L..BB0_3: # %endblock +; CHECK-AIX32-P10-NEXT: cntlzw r3, r3 +; CHECK-AIX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-AIX32-P10-NEXT: blr +; +; CHECK-LINUX32-P8-LABEL: cmpeq16: +; CHECK-LINUX32-P8: # %bb.0: # %entry +; CHECK-LINUX32-P8-NEXT: lwz r5, 0(r3) +; CHECK-LINUX32-P8-NEXT: lwz r6, 4(r3) +; CHECK-LINUX32-P8-NEXT: lwz r7, 0(r4) +; CHECK-LINUX32-P8-NEXT: lwz r8, 4(r4) +; CHECK-LINUX32-P8-NEXT: xor r6, r6, r8 +; CHECK-LINUX32-P8-NEXT: xor r5, r5, r7 +; CHECK-LINUX32-P8-NEXT: or. r5, r5, r6 +; CHECK-LINUX32-P8-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX32-P8-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX32-P8-NEXT: lwz r5, 8(r3) +; CHECK-LINUX32-P8-NEXT: lwz r3, 12(r3) +; CHECK-LINUX32-P8-NEXT: lwz r6, 8(r4) +; CHECK-LINUX32-P8-NEXT: lwz r4, 12(r4) +; CHECK-LINUX32-P8-NEXT: xor r3, r3, r4 +; CHECK-LINUX32-P8-NEXT: xor r4, r5, r6 +; CHECK-LINUX32-P8-NEXT: or. r3, r4, r3 +; CHECK-LINUX32-P8-NEXT: li r3, 0 +; CHECK-LINUX32-P8-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX32-P8-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX32-P8-NEXT: li r3, 1 +; CHECK-LINUX32-P8-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX32-P8-NEXT: cntlzw r3, r3 +; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-LINUX32-P8-NEXT: blr +; +; CHECK-LINUX32-P10-LABEL: cmpeq16: +; CHECK-LINUX32-P10: # %bb.0: # %entry +; CHECK-LINUX32-P10-NEXT: lwz r5, 0(r3) +; CHECK-LINUX32-P10-NEXT: lwz r6, 4(r3) +; CHECK-LINUX32-P10-NEXT: lwz r7, 0(r4) +; CHECK-LINUX32-P10-NEXT: xor r5, r5, r7 +; CHECK-LINUX32-P10-NEXT: lwz r8, 4(r4) +; CHECK-LINUX32-P10-NEXT: xor r6, r6, r8 +; CHECK-LINUX32-P10-NEXT: or. r5, r5, r6 +; CHECK-LINUX32-P10-NEXT: bne cr0, .LBB0_2 +; CHECK-LINUX32-P10-NEXT: # %bb.1: # %loadbb1 +; CHECK-LINUX32-P10-NEXT: lwz r5, 8(r3) +; CHECK-LINUX32-P10-NEXT: lwz r3, 12(r3) +; CHECK-LINUX32-P10-NEXT: lwz r6, 8(r4) +; CHECK-LINUX32-P10-NEXT: lwz r4, 12(r4) +; CHECK-LINUX32-P10-NEXT: xor r3, r3, r4 +; CHECK-LINUX32-P10-NEXT: xor r4, r5, r6 +; CHECK-LINUX32-P10-NEXT: or. r3, r4, r3 +; CHECK-LINUX32-P10-NEXT: li r3, 0 +; CHECK-LINUX32-P10-NEXT: beq cr0, .LBB0_3 +; CHECK-LINUX32-P10-NEXT: .LBB0_2: # %res_block +; CHECK-LINUX32-P10-NEXT: li r3, 1 +; CHECK-LINUX32-P10-NEXT: .LBB0_3: # %endblock +; CHECK-LINUX32-P10-NEXT: cntlzw r3, r3 +; CHECK-LINUX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-LINUX32-P10-NEXT: blr +entry: + %bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i32 16) + %cmp = icmp eq i32 %bcmp, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i32) + diff --git a/llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll similarity index 100% rename from llvm/test/CodeGen/PowerPC/memcmp_fixsize.ll rename to llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll From ee1488a8f20ab92b15acecd422b5e6ae6ed7595b Mon Sep 17 00:00:00 2001 From: zhijian Date: Mon, 15 Sep 2025 13:56:48 +0000 Subject: [PATCH 3/3] remove -verify-machineinstrs --- llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll | 8 ++++---- llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll index 20eb54b7f721b..f5483ad2a7c3f 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P10 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P10 define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) { diff --git a/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll index 62048f8f8dfc6..216b7638642d4 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P10 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P10 define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {