@@ -527,23 +527,14 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
527
527
ret void
528
528
}
529
529
530
- ; TODO: We want to generate this code:
531
- ; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
532
- ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
533
- ; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
534
- ; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
535
- ; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
536
- ; store <16 x i8> %bitcast_, ptr %gep_s0, align 1
537
- ; ret void
538
- ; }
539
- define void @constant_stride_widen_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
540
- ; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
530
+ define void @constant_stride_masked_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
531
+ ; CHECK-LABEL: define void @constant_stride_masked_no_reordering(
541
532
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
542
533
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
543
534
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
544
535
; CHECK-NEXT: [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 1, <28 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <28 x i8> poison)
545
- ; CHECK-NEXT: [[TMP8 :%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
546
- ; CHECK-NEXT: store <16 x i8> [[TMP8 ]], ptr [[GEP_S0]], align 1
536
+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
537
+ ; CHECK-NEXT: store <16 x i8> [[TMP2 ]], ptr [[GEP_S0]], align 1
547
538
; CHECK-NEXT: ret void
548
539
;
549
540
%gep_l0 = getelementptr inbounds i8 , ptr %pl , i64 0
@@ -617,6 +608,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
617
608
ret void
618
609
}
619
610
611
+ ; TODO: We want to generate this code:
612
+ ; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
613
+ ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
614
+ ; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
615
+ ; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
616
+ ; %2 = bitcast <4 x i32> %1 to <16 x i8>
617
+ ; store <16 x i8> %2, ptr %gep_s0, align 1
618
+ ; ret void
619
+ ; }
620
+ define void @constant_stride_widen_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
621
+ ; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
622
+ ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
623
+ ; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
624
+ ; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
625
+ ; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
626
+ ; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
627
+ ; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
628
+ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
629
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
630
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
631
+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
632
+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
633
+ ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
634
+ ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
635
+ ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
636
+ ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
637
+ ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
638
+ ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
639
+ ; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
640
+ ; CHECK-NEXT: ret void
641
+ ;
642
+ %gep_l0 = getelementptr inbounds i8 , ptr %pl , i64 0
643
+ %gep_l1 = getelementptr inbounds i8 , ptr %pl , i64 1
644
+ %gep_l2 = getelementptr inbounds i8 , ptr %pl , i64 2
645
+ %gep_l3 = getelementptr inbounds i8 , ptr %pl , i64 3
646
+ %gep_l4 = getelementptr inbounds i8 , ptr %pl , i64 100
647
+ %gep_l5 = getelementptr inbounds i8 , ptr %pl , i64 101
648
+ %gep_l6 = getelementptr inbounds i8 , ptr %pl , i64 102
649
+ %gep_l7 = getelementptr inbounds i8 , ptr %pl , i64 103
650
+ %gep_l8 = getelementptr inbounds i8 , ptr %pl , i64 200
651
+ %gep_l9 = getelementptr inbounds i8 , ptr %pl , i64 201
652
+ %gep_l10 = getelementptr inbounds i8 , ptr %pl , i64 202
653
+ %gep_l11 = getelementptr inbounds i8 , ptr %pl , i64 203
654
+ %gep_l12 = getelementptr inbounds i8 , ptr %pl , i64 300
655
+ %gep_l13 = getelementptr inbounds i8 , ptr %pl , i64 301
656
+ %gep_l14 = getelementptr inbounds i8 , ptr %pl , i64 302
657
+ %gep_l15 = getelementptr inbounds i8 , ptr %pl , i64 303
658
+
659
+ %load0 = load i8 , ptr %gep_l0 , align 1
660
+ %load1 = load i8 , ptr %gep_l1 , align 1
661
+ %load2 = load i8 , ptr %gep_l2 , align 1
662
+ %load3 = load i8 , ptr %gep_l3 , align 1
663
+ %load4 = load i8 , ptr %gep_l4 , align 1
664
+ %load5 = load i8 , ptr %gep_l5 , align 1
665
+ %load6 = load i8 , ptr %gep_l6 , align 1
666
+ %load7 = load i8 , ptr %gep_l7 , align 1
667
+ %load8 = load i8 , ptr %gep_l8 , align 1
668
+ %load9 = load i8 , ptr %gep_l9 , align 1
669
+ %load10 = load i8 , ptr %gep_l10 , align 1
670
+ %load11 = load i8 , ptr %gep_l11 , align 1
671
+ %load12 = load i8 , ptr %gep_l12 , align 1
672
+ %load13 = load i8 , ptr %gep_l13 , align 1
673
+ %load14 = load i8 , ptr %gep_l14 , align 1
674
+ %load15 = load i8 , ptr %gep_l15 , align 1
675
+
676
+ %gep_s0 = getelementptr inbounds i8 , ptr %ps , i64 0
677
+ %gep_s1 = getelementptr inbounds i8 , ptr %ps , i64 1
678
+ %gep_s2 = getelementptr inbounds i8 , ptr %ps , i64 2
679
+ %gep_s3 = getelementptr inbounds i8 , ptr %ps , i64 3
680
+ %gep_s4 = getelementptr inbounds i8 , ptr %ps , i64 4
681
+ %gep_s5 = getelementptr inbounds i8 , ptr %ps , i64 5
682
+ %gep_s6 = getelementptr inbounds i8 , ptr %ps , i64 6
683
+ %gep_s7 = getelementptr inbounds i8 , ptr %ps , i64 7
684
+ %gep_s8 = getelementptr inbounds i8 , ptr %ps , i64 8
685
+ %gep_s9 = getelementptr inbounds i8 , ptr %ps , i64 9
686
+ %gep_s10 = getelementptr inbounds i8 , ptr %ps , i64 10
687
+ %gep_s11 = getelementptr inbounds i8 , ptr %ps , i64 11
688
+ %gep_s12 = getelementptr inbounds i8 , ptr %ps , i64 12
689
+ %gep_s13 = getelementptr inbounds i8 , ptr %ps , i64 13
690
+ %gep_s14 = getelementptr inbounds i8 , ptr %ps , i64 14
691
+ %gep_s15 = getelementptr inbounds i8 , ptr %ps , i64 15
692
+
693
+ store i8 %load0 , ptr %gep_s0 , align 1
694
+ store i8 %load1 , ptr %gep_s1 , align 1
695
+ store i8 %load2 , ptr %gep_s2 , align 1
696
+ store i8 %load3 , ptr %gep_s3 , align 1
697
+ store i8 %load4 , ptr %gep_s4 , align 1
698
+ store i8 %load5 , ptr %gep_s5 , align 1
699
+ store i8 %load6 , ptr %gep_s6 , align 1
700
+ store i8 %load7 , ptr %gep_s7 , align 1
701
+ store i8 %load8 , ptr %gep_s8 , align 1
702
+ store i8 %load9 , ptr %gep_s9 , align 1
703
+ store i8 %load10 , ptr %gep_s10 , align 1
704
+ store i8 %load11 , ptr %gep_s11 , align 1
705
+ store i8 %load12 , ptr %gep_s12 , align 1
706
+ store i8 %load13 , ptr %gep_s13 , align 1
707
+ store i8 %load14 , ptr %gep_s14 , align 1
708
+ store i8 %load15 , ptr %gep_s15 , align 1
709
+
710
+ ret void
711
+ }
620
712
; TODO: We want to generate this code:
621
713
; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
622
714
; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
0 commit comments