Skip to content

Commit 716dddc

Browse files
committed
remove hasSideEffects for XXSPLTIB
1 parent 7eaaa4e commit 716dddc

File tree

3 files changed

+99
-6
lines changed

3 files changed

+99
-6
lines changed

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,8 +1659,6 @@ let Predicates = [HasVSX, HasP9Vector] in {
16591659
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
16601660

16611661
// Vector Splat Immediate Byte
1662-
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
1663-
let hasSideEffects = 1 in
16641662
def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
16651663
"xxspltib $XT, $IMM8", IIC_VecPerm, []>;
16661664

llvm/test/CodeGen/PowerPC/memset-tail.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,9 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
380380
;
381381
; P9-BE-LABEL: memsetTailV1B2:
382382
; P9-BE: # %bb.0: # %entry
383+
; P9-BE-NEXT: xxspltib 0, 15
383384
; P9-BE-NEXT: li 4, 3855
384385
; P9-BE-NEXT: sth 4, 16(3)
385-
; P9-BE-NEXT: xxspltib 0, 15
386386
; P9-BE-NEXT: stxv 0, 0(3)
387387
; P9-BE-NEXT: blr
388388
;
@@ -433,9 +433,9 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
433433
;
434434
; P9-BE-LABEL: memsetTailV1B1:
435435
; P9-BE: # %bb.0: # %entry
436+
; P9-BE-NEXT: xxspltib 0, 15
436437
; P9-BE-NEXT: li 4, 15
437438
; P9-BE-NEXT: stb 4, 16(3)
438-
; P9-BE-NEXT: xxspltib 0, 15
439439
; P9-BE-NEXT: stxv 0, 0(3)
440440
; P9-BE-NEXT: blr
441441
;
@@ -861,9 +861,9 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
861861
;
862862
; P9-BE-LABEL: memset2TailV1B2:
863863
; P9-BE: # %bb.0: # %entry
864+
; P9-BE-NEXT: xxspltib 0, 165
864865
; P9-BE-NEXT: li 4, -23131
865866
; P9-BE-NEXT: sth 4, 16(3)
866-
; P9-BE-NEXT: xxspltib 0, 165
867867
; P9-BE-NEXT: stxv 0, 0(3)
868868
; P9-BE-NEXT: blr
869869
;
@@ -917,9 +917,9 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
917917
;
918918
; P9-BE-LABEL: memset2TailV1B1:
919919
; P9-BE: # %bb.0: # %entry
920+
; P9-BE-NEXT: xxspltib 0, 165
920921
; P9-BE-NEXT: li 4, -91
921922
; P9-BE-NEXT: stb 4, 16(3)
922-
; P9-BE-NEXT: xxspltib 0, 165
923923
; P9-BE-NEXT: stxv 0, 0(3)
924924
; P9-BE-NEXT: blr
925925
;
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
;; Test `xxspltib` hoist out loop.
2+
3+
; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \
4+
; RUN: %s -o - 2>&1 | FileCheck %s
5+
6+
define void @_Z3fooPfS_Pi(ptr noalias nocapture noundef writeonly %_a, ptr noalias nocapture noundef readonly %In_a, ptr noalias nocapture noundef readonly %n) local_unnamed_addr #0 {
7+
entry:
8+
%0 = load i32, ptr %n, align 4
9+
%cmp9 = icmp sgt i32 %0, 0
10+
br i1 %cmp9, label %for.body.preheader, label %for.cond.cleanup
11+
12+
for.body.preheader:
13+
%wide.trip.count = zext nneg i32 %0 to i64
14+
%xtraiter = and i64 %wide.trip.count, 1
15+
%1 = icmp eq i32 %0, 1
16+
br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
17+
18+
for.body.preheader.new:
19+
%unroll_iter = and i64 %wide.trip.count, 2147483646
20+
br label %for.body
21+
22+
for.cond.cleanup.loopexit.unr-lcssa:
23+
%indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.1, %for.body ]
24+
%lcmp.mod.not = icmp eq i64 %xtraiter, 0
25+
br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil
26+
27+
for.body.epil:
28+
%arrayidx.epil = getelementptr inbounds nuw float, ptr %In_a, i64 %indvars.iv.unr
29+
%2 = load float, ptr %arrayidx.epil, align 4
30+
%vecins.i.epil = insertelement <4 x float> poison, float %2, i64 0
31+
%3 = bitcast <4 x float> %vecins.i.epil to <16 x i8>
32+
%and1.i.epil = and <16 x i8> %3, <i8 6, i8 6, i8 6, i8 6, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
33+
%4 = bitcast <16 x i8> %and1.i.epil to <4 x float>
34+
%vecext.i.epil = extractelement <4 x float> %4, i64 0
35+
%arrayidx5.epil = getelementptr inbounds nuw float, ptr %_a, i64 %indvars.iv.unr
36+
store float %vecext.i.epil, ptr %arrayidx5.epil, align 4
37+
br label %for.cond.cleanup
38+
39+
for.cond.cleanup:
40+
ret void
41+
42+
for.body:
43+
%indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ]
44+
%niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.1, %for.body ]
45+
%arrayidx = getelementptr inbounds nuw float, ptr %In_a, i64 %indvars.iv
46+
%5 = load float, ptr %arrayidx, align 4
47+
%vecins.i = insertelement <4 x float> poison, float %5, i64 0
48+
%6 = bitcast <4 x float> %vecins.i to <16 x i8>
49+
%and1.i = and <16 x i8> %6, <i8 6, i8 6, i8 6, i8 6, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
50+
%7 = bitcast <16 x i8> %and1.i to <4 x float>
51+
%vecext.i = extractelement <4 x float> %7, i64 0
52+
%arrayidx5 = getelementptr inbounds nuw float, ptr %_a, i64 %indvars.iv
53+
store float %vecext.i, ptr %arrayidx5, align 4
54+
%indvars.iv.next = or disjoint i64 %indvars.iv, 1
55+
%arrayidx.1 = getelementptr inbounds nuw float, ptr %In_a, i64 %indvars.iv.next
56+
%8 = load float, ptr %arrayidx.1, align 4
57+
%vecins.i.1 = insertelement <4 x float> poison, float %8, i64 0
58+
%9 = bitcast <4 x float> %vecins.i.1 to <16 x i8>
59+
%and1.i.1 = and <16 x i8> %9, <i8 6, i8 6, i8 6, i8 6, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
60+
%10 = bitcast <16 x i8> %and1.i.1 to <4 x float>
61+
%vecext.i.1 = extractelement <4 x float> %10, i64 0
62+
%arrayidx5.1 = getelementptr inbounds nuw float, ptr %_a, i64 %indvars.iv.next
63+
store float %vecext.i.1, ptr %arrayidx5.1, align 4
64+
%indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
65+
%niter.next.1 = add i64 %niter, 2
66+
%niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
67+
br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
68+
}
69+
70+
attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr10" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+mma,+paired-vector-memops,+pcrelative-memops,+power10-vector,+power8-vector,+power9-vector,+prefix-instrs,+quadword-atomics,+vsx,-aix-shared-lib-tls-model-opt,-aix-small-local-dynamic-tls,-aix-small-local-exec-tls,-htm,-privileged,-rop-protect,-spe" }
71+
72+
; CHECK: ._Z3fooPfS_Pi:
73+
; CHECK-NEXT: # %bb.0: # %entry
74+
; CHECK-NEXT: lwz 5, 0(5)
75+
; CHECK-NEXT: cmpwi 5, 1
76+
; CHECK-NEXT: bltlr 0
77+
; CHECK-NEXT: # %bb.1: # %for.body.preheader
78+
; CHECK-NEXT: li 6, 0
79+
; CHECK-NEXT: cmplwi 5, 1
80+
; CHECK-NEXT: beq 0, L..BB0_4
81+
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
82+
; CHECK-NEXT: rlwinm 6, 5, 0, 1, 30
83+
; CHECK-NEXT: xxspltib 0, 6
84+
; CHECK-NEXT: addi 9, 4, -8
85+
; CHECK-NEXT: addi 7, 3, -8
86+
; CHECK-NEXT: li 8, 8
87+
; CHECK-NEXT: li 10, 12
88+
; CHECK-NEXT: li 11, 4
89+
; CHECK-NEXT: addi 6, 6, -2
90+
; CHECK-NEXT: rldicl 6, 6, 63, 1
91+
; CHECK-NEXT: addi 6, 6, 1
92+
; CHECK-NEXT: mtctr 6
93+
; CHECK-NEXT: li 6, 0
94+
; CHECK-NEXT: .align 4
95+
; CHECK-NEXT: L..BB0_3:

0 commit comments

Comments
 (0)