Skip to content

Commit 95e7351

Browse files
diggerlingithub-actions[bot]
authored andcommitted
Automerge: [PowerPC][NFC] Pre-commit test case: Implement a more efficient memcmp in cases where the length is known (#158367)
The newly added test case will be used to verify a more efficient memcmp in cases where the length is known.
2 parents 379fd9d + 4bf0001 commit 95e7351

File tree

2 files changed

+228
-0
lines changed

2 files changed

+228
-0
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \
3+
; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P8
4+
5+
; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix < %s | \
6+
; RUN: FileCheck %s --check-prefix=CHECK-AIX32-P10
7+
8+
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \
9+
; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P8
10+
11+
; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpcle-unknown-linux-gnu < %s | \
12+
; RUN: FileCheck %s --check-prefix=CHECK-LINUX32-P10
13+
14+
define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {
15+
; CHECK-AIX32-P8-LABEL: cmpeq16:
16+
; CHECK-AIX32-P8: # %bb.0: # %entry
17+
; CHECK-AIX32-P8-NEXT: lwz r5, 4(r3)
18+
; CHECK-AIX32-P8-NEXT: lwz r6, 0(r3)
19+
; CHECK-AIX32-P8-NEXT: lwz r7, 4(r4)
20+
; CHECK-AIX32-P8-NEXT: lwz r8, 0(r4)
21+
; CHECK-AIX32-P8-NEXT: xor r6, r6, r8
22+
; CHECK-AIX32-P8-NEXT: xor r5, r5, r7
23+
; CHECK-AIX32-P8-NEXT: or. r5, r5, r6
24+
; CHECK-AIX32-P8-NEXT: bne cr0, L..BB0_2
25+
; CHECK-AIX32-P8-NEXT: # %bb.1: # %loadbb1
26+
; CHECK-AIX32-P8-NEXT: lwz r5, 12(r3)
27+
; CHECK-AIX32-P8-NEXT: lwz r3, 8(r3)
28+
; CHECK-AIX32-P8-NEXT: lwz r6, 12(r4)
29+
; CHECK-AIX32-P8-NEXT: lwz r4, 8(r4)
30+
; CHECK-AIX32-P8-NEXT: xor r3, r3, r4
31+
; CHECK-AIX32-P8-NEXT: xor r4, r5, r6
32+
; CHECK-AIX32-P8-NEXT: or. r3, r4, r3
33+
; CHECK-AIX32-P8-NEXT: li r3, 0
34+
; CHECK-AIX32-P8-NEXT: beq cr0, L..BB0_3
35+
; CHECK-AIX32-P8-NEXT: L..BB0_2: # %res_block
36+
; CHECK-AIX32-P8-NEXT: li r3, 1
37+
; CHECK-AIX32-P8-NEXT: L..BB0_3: # %endblock
38+
; CHECK-AIX32-P8-NEXT: cntlzw r3, r3
39+
; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31
40+
; CHECK-AIX32-P8-NEXT: blr
41+
;
42+
; CHECK-AIX32-P10-LABEL: cmpeq16:
43+
; CHECK-AIX32-P10: # %bb.0: # %entry
44+
; CHECK-AIX32-P10-NEXT: lwz r5, 4(r3)
45+
; CHECK-AIX32-P10-NEXT: lwz r6, 0(r3)
46+
; CHECK-AIX32-P10-NEXT: lwz r7, 4(r4)
47+
; CHECK-AIX32-P10-NEXT: xor r5, r5, r7
48+
; CHECK-AIX32-P10-NEXT: lwz r8, 0(r4)
49+
; CHECK-AIX32-P10-NEXT: xor r6, r6, r8
50+
; CHECK-AIX32-P10-NEXT: or. r5, r5, r6
51+
; CHECK-AIX32-P10-NEXT: bne cr0, L..BB0_2
52+
; CHECK-AIX32-P10-NEXT: # %bb.1: # %loadbb1
53+
; CHECK-AIX32-P10-NEXT: lwz r5, 12(r3)
54+
; CHECK-AIX32-P10-NEXT: lwz r3, 8(r3)
55+
; CHECK-AIX32-P10-NEXT: lwz r6, 12(r4)
56+
; CHECK-AIX32-P10-NEXT: lwz r4, 8(r4)
57+
; CHECK-AIX32-P10-NEXT: xor r3, r3, r4
58+
; CHECK-AIX32-P10-NEXT: xor r4, r5, r6
59+
; CHECK-AIX32-P10-NEXT: or. r3, r4, r3
60+
; CHECK-AIX32-P10-NEXT: li r3, 0
61+
; CHECK-AIX32-P10-NEXT: beq cr0, L..BB0_3
62+
; CHECK-AIX32-P10-NEXT: L..BB0_2: # %res_block
63+
; CHECK-AIX32-P10-NEXT: li r3, 1
64+
; CHECK-AIX32-P10-NEXT: L..BB0_3: # %endblock
65+
; CHECK-AIX32-P10-NEXT: cntlzw r3, r3
66+
; CHECK-AIX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
67+
; CHECK-AIX32-P10-NEXT: blr
68+
;
69+
; CHECK-LINUX32-P8-LABEL: cmpeq16:
70+
; CHECK-LINUX32-P8: # %bb.0: # %entry
71+
; CHECK-LINUX32-P8-NEXT: lwz r5, 0(r3)
72+
; CHECK-LINUX32-P8-NEXT: lwz r6, 4(r3)
73+
; CHECK-LINUX32-P8-NEXT: lwz r7, 0(r4)
74+
; CHECK-LINUX32-P8-NEXT: lwz r8, 4(r4)
75+
; CHECK-LINUX32-P8-NEXT: xor r6, r6, r8
76+
; CHECK-LINUX32-P8-NEXT: xor r5, r5, r7
77+
; CHECK-LINUX32-P8-NEXT: or. r5, r5, r6
78+
; CHECK-LINUX32-P8-NEXT: bne cr0, .LBB0_2
79+
; CHECK-LINUX32-P8-NEXT: # %bb.1: # %loadbb1
80+
; CHECK-LINUX32-P8-NEXT: lwz r5, 8(r3)
81+
; CHECK-LINUX32-P8-NEXT: lwz r3, 12(r3)
82+
; CHECK-LINUX32-P8-NEXT: lwz r6, 8(r4)
83+
; CHECK-LINUX32-P8-NEXT: lwz r4, 12(r4)
84+
; CHECK-LINUX32-P8-NEXT: xor r3, r3, r4
85+
; CHECK-LINUX32-P8-NEXT: xor r4, r5, r6
86+
; CHECK-LINUX32-P8-NEXT: or. r3, r4, r3
87+
; CHECK-LINUX32-P8-NEXT: li r3, 0
88+
; CHECK-LINUX32-P8-NEXT: beq cr0, .LBB0_3
89+
; CHECK-LINUX32-P8-NEXT: .LBB0_2: # %res_block
90+
; CHECK-LINUX32-P8-NEXT: li r3, 1
91+
; CHECK-LINUX32-P8-NEXT: .LBB0_3: # %endblock
92+
; CHECK-LINUX32-P8-NEXT: cntlzw r3, r3
93+
; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31
94+
; CHECK-LINUX32-P8-NEXT: blr
95+
;
96+
; CHECK-LINUX32-P10-LABEL: cmpeq16:
97+
; CHECK-LINUX32-P10: # %bb.0: # %entry
98+
; CHECK-LINUX32-P10-NEXT: lwz r5, 0(r3)
99+
; CHECK-LINUX32-P10-NEXT: lwz r6, 4(r3)
100+
; CHECK-LINUX32-P10-NEXT: lwz r7, 0(r4)
101+
; CHECK-LINUX32-P10-NEXT: xor r5, r5, r7
102+
; CHECK-LINUX32-P10-NEXT: lwz r8, 4(r4)
103+
; CHECK-LINUX32-P10-NEXT: xor r6, r6, r8
104+
; CHECK-LINUX32-P10-NEXT: or. r5, r5, r6
105+
; CHECK-LINUX32-P10-NEXT: bne cr0, .LBB0_2
106+
; CHECK-LINUX32-P10-NEXT: # %bb.1: # %loadbb1
107+
; CHECK-LINUX32-P10-NEXT: lwz r5, 8(r3)
108+
; CHECK-LINUX32-P10-NEXT: lwz r3, 12(r3)
109+
; CHECK-LINUX32-P10-NEXT: lwz r6, 8(r4)
110+
; CHECK-LINUX32-P10-NEXT: lwz r4, 12(r4)
111+
; CHECK-LINUX32-P10-NEXT: xor r3, r3, r4
112+
; CHECK-LINUX32-P10-NEXT: xor r4, r5, r6
113+
; CHECK-LINUX32-P10-NEXT: or. r3, r4, r3
114+
; CHECK-LINUX32-P10-NEXT: li r3, 0
115+
; CHECK-LINUX32-P10-NEXT: beq cr0, .LBB0_3
116+
; CHECK-LINUX32-P10-NEXT: .LBB0_2: # %res_block
117+
; CHECK-LINUX32-P10-NEXT: li r3, 1
118+
; CHECK-LINUX32-P10-NEXT: .LBB0_3: # %endblock
119+
; CHECK-LINUX32-P10-NEXT: cntlzw r3, r3
120+
; CHECK-LINUX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
121+
; CHECK-LINUX32-P10-NEXT: blr
122+
entry:
123+
%bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i32 16)
124+
%cmp = icmp eq i32 %bcmp, 0
125+
%conv = zext i1 %cmp to i32
126+
ret i32 %conv
127+
}
128+
129+
declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i32)
130+
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \
3+
; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P8
4+
5+
; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix < %s | \
6+
; RUN: FileCheck %s --check-prefix=CHECK-AIX64-32-P10
7+
8+
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \
9+
; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P8
10+
11+
; RUN: llc -mcpu=pwr10 -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s | \
12+
; RUN: FileCheck %s --check-prefix=CHECK-LINUX64-P10
13+
14+
define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {
15+
; CHECK-AIX64-32-P8-LABEL: cmpeq16:
16+
; CHECK-AIX64-32-P8: # %bb.0: # %entry
17+
; CHECK-AIX64-32-P8-NEXT: ld r5, 0(r3)
18+
; CHECK-AIX64-32-P8-NEXT: ld r6, 0(r4)
19+
; CHECK-AIX64-32-P8-NEXT: cmpld r5, r6
20+
; CHECK-AIX64-32-P8-NEXT: bne cr0, L..BB0_2
21+
; CHECK-AIX64-32-P8-NEXT: # %bb.1: # %loadbb1
22+
; CHECK-AIX64-32-P8-NEXT: ld r5, 8(r3)
23+
; CHECK-AIX64-32-P8-NEXT: ld r4, 8(r4)
24+
; CHECK-AIX64-32-P8-NEXT: li r3, 0
25+
; CHECK-AIX64-32-P8-NEXT: cmpld r5, r4
26+
; CHECK-AIX64-32-P8-NEXT: beq cr0, L..BB0_3
27+
; CHECK-AIX64-32-P8-NEXT: L..BB0_2: # %res_block
28+
; CHECK-AIX64-32-P8-NEXT: li r3, 1
29+
; CHECK-AIX64-32-P8-NEXT: L..BB0_3: # %endblock
30+
; CHECK-AIX64-32-P8-NEXT: cntlzw r3, r3
31+
; CHECK-AIX64-32-P8-NEXT: srwi r3, r3, 5
32+
; CHECK-AIX64-32-P8-NEXT: blr
33+
;
34+
; CHECK-AIX64-32-P10-LABEL: cmpeq16:
35+
; CHECK-AIX64-32-P10: # %bb.0: # %entry
36+
; CHECK-AIX64-32-P10-NEXT: ld r5, 0(r3)
37+
; CHECK-AIX64-32-P10-NEXT: ld r6, 0(r4)
38+
; CHECK-AIX64-32-P10-NEXT: cmpld r5, r6
39+
; CHECK-AIX64-32-P10-NEXT: bne cr0, L..BB0_2
40+
; CHECK-AIX64-32-P10-NEXT: # %bb.1: # %loadbb1
41+
; CHECK-AIX64-32-P10-NEXT: ld r5, 8(r3)
42+
; CHECK-AIX64-32-P10-NEXT: ld r4, 8(r4)
43+
; CHECK-AIX64-32-P10-NEXT: li r3, 0
44+
; CHECK-AIX64-32-P10-NEXT: cmpld r5, r4
45+
; CHECK-AIX64-32-P10-NEXT: beq cr0, L..BB0_3
46+
; CHECK-AIX64-32-P10-NEXT: L..BB0_2: # %res_block
47+
; CHECK-AIX64-32-P10-NEXT: li r3, 1
48+
; CHECK-AIX64-32-P10-NEXT: L..BB0_3: # %endblock
49+
; CHECK-AIX64-32-P10-NEXT: cntlzw r3, r3
50+
; CHECK-AIX64-32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
51+
; CHECK-AIX64-32-P10-NEXT: blr
52+
;
53+
; CHECK-LINUX64-P8-LABEL: cmpeq16:
54+
; CHECK-LINUX64-P8: # %bb.0: # %entry
55+
; CHECK-LINUX64-P8-NEXT: ld r5, 0(r3)
56+
; CHECK-LINUX64-P8-NEXT: ld r6, 0(r4)
57+
; CHECK-LINUX64-P8-NEXT: cmpld r5, r6
58+
; CHECK-LINUX64-P8-NEXT: bne cr0, .LBB0_2
59+
; CHECK-LINUX64-P8-NEXT: # %bb.1: # %loadbb1
60+
; CHECK-LINUX64-P8-NEXT: ld r5, 8(r3)
61+
; CHECK-LINUX64-P8-NEXT: ld r4, 8(r4)
62+
; CHECK-LINUX64-P8-NEXT: li r3, 0
63+
; CHECK-LINUX64-P8-NEXT: cmpld r5, r4
64+
; CHECK-LINUX64-P8-NEXT: beq cr0, .LBB0_3
65+
; CHECK-LINUX64-P8-NEXT: .LBB0_2: # %res_block
66+
; CHECK-LINUX64-P8-NEXT: li r3, 1
67+
; CHECK-LINUX64-P8-NEXT: .LBB0_3: # %endblock
68+
; CHECK-LINUX64-P8-NEXT: cntlzw r3, r3
69+
; CHECK-LINUX64-P8-NEXT: srwi r3, r3, 5
70+
; CHECK-LINUX64-P8-NEXT: blr
71+
;
72+
; CHECK-LINUX64-P10-LABEL: cmpeq16:
73+
; CHECK-LINUX64-P10: # %bb.0: # %entry
74+
; CHECK-LINUX64-P10-NEXT: ld r5, 0(r3)
75+
; CHECK-LINUX64-P10-NEXT: ld r6, 0(r4)
76+
; CHECK-LINUX64-P10-NEXT: cmpld r5, r6
77+
; CHECK-LINUX64-P10-NEXT: bne cr0, .LBB0_2
78+
; CHECK-LINUX64-P10-NEXT: # %bb.1: # %loadbb1
79+
; CHECK-LINUX64-P10-NEXT: ld r5, 8(r3)
80+
; CHECK-LINUX64-P10-NEXT: ld r4, 8(r4)
81+
; CHECK-LINUX64-P10-NEXT: li r3, 0
82+
; CHECK-LINUX64-P10-NEXT: cmpld r5, r4
83+
; CHECK-LINUX64-P10-NEXT: beq cr0, .LBB0_3
84+
; CHECK-LINUX64-P10-NEXT: .LBB0_2: # %res_block
85+
; CHECK-LINUX64-P10-NEXT: li r3, 1
86+
; CHECK-LINUX64-P10-NEXT: .LBB0_3: # %endblock
87+
; CHECK-LINUX64-P10-NEXT: cntlzw r3, r3
88+
; CHECK-LINUX64-P10-NEXT: rlwinm r3, r3, 27, 31, 31
89+
; CHECK-LINUX64-P10-NEXT: blr
90+
entry:
91+
%bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i64 16)
92+
%cmp = icmp eq i32 %bcmp, 0
93+
%conv = zext i1 %cmp to i32
94+
ret i32 %conv
95+
}
96+
97+
declare signext i32 @bcmp(ptr captures(none), ptr captures(none), i64)
98+

0 commit comments

Comments
 (0)