@@ -527,23 +527,14 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
527527 ret void
528528}
529529
530- ; TODO: We want to generate this code:
531- ; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
532- ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
533- ; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
534- ; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
535- ; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
536- ; store <16 x i8> %bitcast_, ptr %gep_s0, align 1
537- ; ret void
538- ; }
539- define void @constant_stride_widen_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
540- ; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
530+ define void @constant_stride_masked_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
531+ ; CHECK-LABEL: define void @constant_stride_masked_no_reordering(
541532; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
542533; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
543534; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
544535; CHECK-NEXT: [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 1, <28 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <28 x i8> poison)
545- ; CHECK-NEXT: [[TMP8 :%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
546- ; CHECK-NEXT: store <16 x i8> [[TMP8 ]], ptr [[GEP_S0]], align 1
536+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
537+ ; CHECK-NEXT: store <16 x i8> [[TMP2 ]], ptr [[GEP_S0]], align 1
547538; CHECK-NEXT: ret void
548539;
549540 %gep_l0 = getelementptr inbounds i8 , ptr %pl , i64 0
@@ -617,6 +608,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
617608 ret void
618609}
619610
611+ ; TODO: We want to generate this code:
612+ ; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
613+ ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
614+ ; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
615+ ; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
616+ ; %2 = bitcast <4 x i32> %1 to <16 x i8>
617+ ; store <16 x i8> %2, ptr %gep_s0, align 1
618+ ; ret void
619+ ; }
620+ define void @constant_stride_widen_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
621+ ; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
622+ ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
623+ ; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
624+ ; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
625+ ; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
626+ ; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
627+ ; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
628+ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
629+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
630+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
631+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
632+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
633+ ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
634+ ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
635+ ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
636+ ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
637+ ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
638+ ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
639+ ; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
640+ ; CHECK-NEXT: ret void
641+ ;
642+ %gep_l0 = getelementptr inbounds i8 , ptr %pl , i64 0
643+ %gep_l1 = getelementptr inbounds i8 , ptr %pl , i64 1
644+ %gep_l2 = getelementptr inbounds i8 , ptr %pl , i64 2
645+ %gep_l3 = getelementptr inbounds i8 , ptr %pl , i64 3
646+ %gep_l4 = getelementptr inbounds i8 , ptr %pl , i64 100
647+ %gep_l5 = getelementptr inbounds i8 , ptr %pl , i64 101
648+ %gep_l6 = getelementptr inbounds i8 , ptr %pl , i64 102
649+ %gep_l7 = getelementptr inbounds i8 , ptr %pl , i64 103
650+ %gep_l8 = getelementptr inbounds i8 , ptr %pl , i64 200
651+ %gep_l9 = getelementptr inbounds i8 , ptr %pl , i64 201
652+ %gep_l10 = getelementptr inbounds i8 , ptr %pl , i64 202
653+ %gep_l11 = getelementptr inbounds i8 , ptr %pl , i64 203
654+ %gep_l12 = getelementptr inbounds i8 , ptr %pl , i64 300
655+ %gep_l13 = getelementptr inbounds i8 , ptr %pl , i64 301
656+ %gep_l14 = getelementptr inbounds i8 , ptr %pl , i64 302
657+ %gep_l15 = getelementptr inbounds i8 , ptr %pl , i64 303
658+
659+ %load0 = load i8 , ptr %gep_l0 , align 1
660+ %load1 = load i8 , ptr %gep_l1 , align 1
661+ %load2 = load i8 , ptr %gep_l2 , align 1
662+ %load3 = load i8 , ptr %gep_l3 , align 1
663+ %load4 = load i8 , ptr %gep_l4 , align 1
664+ %load5 = load i8 , ptr %gep_l5 , align 1
665+ %load6 = load i8 , ptr %gep_l6 , align 1
666+ %load7 = load i8 , ptr %gep_l7 , align 1
667+ %load8 = load i8 , ptr %gep_l8 , align 1
668+ %load9 = load i8 , ptr %gep_l9 , align 1
669+ %load10 = load i8 , ptr %gep_l10 , align 1
670+ %load11 = load i8 , ptr %gep_l11 , align 1
671+ %load12 = load i8 , ptr %gep_l12 , align 1
672+ %load13 = load i8 , ptr %gep_l13 , align 1
673+ %load14 = load i8 , ptr %gep_l14 , align 1
674+ %load15 = load i8 , ptr %gep_l15 , align 1
675+
676+ %gep_s0 = getelementptr inbounds i8 , ptr %ps , i64 0
677+ %gep_s1 = getelementptr inbounds i8 , ptr %ps , i64 1
678+ %gep_s2 = getelementptr inbounds i8 , ptr %ps , i64 2
679+ %gep_s3 = getelementptr inbounds i8 , ptr %ps , i64 3
680+ %gep_s4 = getelementptr inbounds i8 , ptr %ps , i64 4
681+ %gep_s5 = getelementptr inbounds i8 , ptr %ps , i64 5
682+ %gep_s6 = getelementptr inbounds i8 , ptr %ps , i64 6
683+ %gep_s7 = getelementptr inbounds i8 , ptr %ps , i64 7
684+ %gep_s8 = getelementptr inbounds i8 , ptr %ps , i64 8
685+ %gep_s9 = getelementptr inbounds i8 , ptr %ps , i64 9
686+ %gep_s10 = getelementptr inbounds i8 , ptr %ps , i64 10
687+ %gep_s11 = getelementptr inbounds i8 , ptr %ps , i64 11
688+ %gep_s12 = getelementptr inbounds i8 , ptr %ps , i64 12
689+ %gep_s13 = getelementptr inbounds i8 , ptr %ps , i64 13
690+ %gep_s14 = getelementptr inbounds i8 , ptr %ps , i64 14
691+ %gep_s15 = getelementptr inbounds i8 , ptr %ps , i64 15
692+
693+ store i8 %load0 , ptr %gep_s0 , align 1
694+ store i8 %load1 , ptr %gep_s1 , align 1
695+ store i8 %load2 , ptr %gep_s2 , align 1
696+ store i8 %load3 , ptr %gep_s3 , align 1
697+ store i8 %load4 , ptr %gep_s4 , align 1
698+ store i8 %load5 , ptr %gep_s5 , align 1
699+ store i8 %load6 , ptr %gep_s6 , align 1
700+ store i8 %load7 , ptr %gep_s7 , align 1
701+ store i8 %load8 , ptr %gep_s8 , align 1
702+ store i8 %load9 , ptr %gep_s9 , align 1
703+ store i8 %load10 , ptr %gep_s10 , align 1
704+ store i8 %load11 , ptr %gep_s11 , align 1
705+ store i8 %load12 , ptr %gep_s12 , align 1
706+ store i8 %load13 , ptr %gep_s13 , align 1
707+ store i8 %load14 , ptr %gep_s14 , align 1
708+ store i8 %load15 , ptr %gep_s15 , align 1
709+
710+ ret void
711+ }
620712; TODO: We want to generate this code:
621713; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
622714; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
0 commit comments