Skip to content

Commit 7cc8408

Browse files
mgudimmahesh-attarde
authored andcommitted
[SLPVectorizer][NFC] A test for widening constant strided loads. (llvm#160552)
Precommit a test.
1 parent d0348d2 commit 7cc8408

File tree

1 file changed

+105
-13
lines changed

1 file changed

+105
-13
lines changed

llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -527,23 +527,14 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
527527
ret void
528528
}
529529

530-
; TODO: We want to generate this code:
531-
; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
532-
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
533-
; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
534-
; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
535-
; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
536-
; store <16 x i8> %bitcast_, ptr %gep_s0, align 1
537-
; ret void
538-
; }
539-
define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
540-
; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
530+
define void @constant_stride_masked_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
531+
; CHECK-LABEL: define void @constant_stride_masked_no_reordering(
541532
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
542533
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
543534
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
544535
; CHECK-NEXT: [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 1, <28 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <28 x i8> poison)
545-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
546-
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
536+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
537+
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
547538
; CHECK-NEXT: ret void
548539
;
549540
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
@@ -617,6 +608,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
617608
ret void
618609
}
619610

611+
; TODO: We want to generate this code:
612+
; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
613+
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
614+
; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
615+
; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
616+
; %2 = bitcast <4 x i32> %1 to <16 x i8>
617+
; store <16 x i8> %2, ptr %gep_s0, align 1
618+
; ret void
619+
; }
620+
define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
621+
; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
622+
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
623+
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
624+
; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
625+
; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
626+
; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
627+
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
628+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
629+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
630+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
631+
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
632+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
633+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
634+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
635+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
636+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
637+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
638+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
639+
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
640+
; CHECK-NEXT: ret void
641+
;
642+
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
643+
%gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
644+
%gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
645+
%gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
646+
%gep_l4 = getelementptr inbounds i8, ptr %pl, i64 100
647+
%gep_l5 = getelementptr inbounds i8, ptr %pl, i64 101
648+
%gep_l6 = getelementptr inbounds i8, ptr %pl, i64 102
649+
%gep_l7 = getelementptr inbounds i8, ptr %pl, i64 103
650+
%gep_l8 = getelementptr inbounds i8, ptr %pl, i64 200
651+
%gep_l9 = getelementptr inbounds i8, ptr %pl, i64 201
652+
%gep_l10 = getelementptr inbounds i8, ptr %pl, i64 202
653+
%gep_l11 = getelementptr inbounds i8, ptr %pl, i64 203
654+
%gep_l12 = getelementptr inbounds i8, ptr %pl, i64 300
655+
%gep_l13 = getelementptr inbounds i8, ptr %pl, i64 301
656+
%gep_l14 = getelementptr inbounds i8, ptr %pl, i64 302
657+
%gep_l15 = getelementptr inbounds i8, ptr %pl, i64 303
658+
659+
%load0 = load i8, ptr %gep_l0 , align 1
660+
%load1 = load i8, ptr %gep_l1 , align 1
661+
%load2 = load i8, ptr %gep_l2 , align 1
662+
%load3 = load i8, ptr %gep_l3 , align 1
663+
%load4 = load i8, ptr %gep_l4 , align 1
664+
%load5 = load i8, ptr %gep_l5 , align 1
665+
%load6 = load i8, ptr %gep_l6 , align 1
666+
%load7 = load i8, ptr %gep_l7 , align 1
667+
%load8 = load i8, ptr %gep_l8 , align 1
668+
%load9 = load i8, ptr %gep_l9 , align 1
669+
%load10 = load i8, ptr %gep_l10, align 1
670+
%load11 = load i8, ptr %gep_l11, align 1
671+
%load12 = load i8, ptr %gep_l12, align 1
672+
%load13 = load i8, ptr %gep_l13, align 1
673+
%load14 = load i8, ptr %gep_l14, align 1
674+
%load15 = load i8, ptr %gep_l15, align 1
675+
676+
%gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
677+
%gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
678+
%gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
679+
%gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
680+
%gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
681+
%gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
682+
%gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
683+
%gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
684+
%gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
685+
%gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
686+
%gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
687+
%gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
688+
%gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
689+
%gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
690+
%gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
691+
%gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15
692+
693+
store i8 %load0, ptr %gep_s0, align 1
694+
store i8 %load1, ptr %gep_s1, align 1
695+
store i8 %load2, ptr %gep_s2, align 1
696+
store i8 %load3, ptr %gep_s3, align 1
697+
store i8 %load4, ptr %gep_s4, align 1
698+
store i8 %load5, ptr %gep_s5, align 1
699+
store i8 %load6, ptr %gep_s6, align 1
700+
store i8 %load7, ptr %gep_s7, align 1
701+
store i8 %load8, ptr %gep_s8, align 1
702+
store i8 %load9, ptr %gep_s9, align 1
703+
store i8 %load10, ptr %gep_s10, align 1
704+
store i8 %load11, ptr %gep_s11, align 1
705+
store i8 %load12, ptr %gep_s12, align 1
706+
store i8 %load13, ptr %gep_s13, align 1
707+
store i8 %load14, ptr %gep_s14, align 1
708+
store i8 %load15, ptr %gep_s15, align 1
709+
710+
ret void
711+
}
620712
; TODO: We want to generate this code:
621713
; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
622714
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0

0 commit comments

Comments
 (0)