@@ -621,8 +621,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
621
621
; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8
622
622
; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
623
623
; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
624
- ; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0
625
- ; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
626
624
; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0
627
625
; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer
628
626
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -644,14 +642,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
644
642
; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1
645
643
; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1
646
644
; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1
647
- ; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0
648
- ; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1
649
- ; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2
650
- ; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3
651
- ; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4
652
- ; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5
653
- ; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6
654
- ; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7
655
645
; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]]
656
646
; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
657
647
; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
@@ -677,22 +667,21 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
677
667
; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6
678
668
; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7
679
669
; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer
680
- ; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]]
681
- ; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0
670
+ ; I32-NEXT: [[TMP53:%.*]] = mul i64 [[TMP11]], [[START]]
671
+ ; I32-NEXT: [[TMP55:%.*]] = mul i64 [[TMP12]], [[START]]
672
+ ; I32-NEXT: [[TMP57:%.*]] = mul i64 [[TMP13]], [[START]]
673
+ ; I32-NEXT: [[TMP59:%.*]] = mul i64 [[TMP14]], [[START]]
674
+ ; I32-NEXT: [[TMP61:%.*]] = mul i64 [[TMP15]], [[START]]
675
+ ; I32-NEXT: [[TMP63:%.*]] = mul i64 [[TMP16]], [[START]]
676
+ ; I32-NEXT: [[TMP65:%.*]] = mul i64 [[TMP17]], [[START]]
677
+ ; I32-NEXT: [[TMP67:%.*]] = mul i64 [[TMP18]], [[START]]
682
678
; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]]
683
- ; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1
684
679
; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]]
685
- ; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2
686
680
; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]]
687
- ; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3
688
681
; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]]
689
- ; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4
690
682
; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]]
691
- ; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5
692
683
; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]]
693
- ; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6
694
684
; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]]
695
- ; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7
696
685
; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]]
697
686
; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0
698
687
; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1
@@ -774,7 +763,222 @@ exit:
774
763
ret void
775
764
}
776
765
777
- attributes #0 = { "target-cpu" ="znver3" }
766
+ define void @address_use_in_different_block (ptr noalias %dst , ptr %src.0 , ptr %src.1 , i32 %x ) #0 {
767
+ ; I64-LABEL: define void @address_use_in_different_block(
768
+ ; I64-SAME: ptr noalias [[DST:%.*]], ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
769
+ ; I64-NEXT: [[ENTRY:.*:]]
770
+ ; I64-NEXT: [[X_POS:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
771
+ ; I64-NEXT: [[OFFSET:%.*]] = zext i32 [[X_POS]] to i64
772
+ ; I64-NEXT: br label %[[VECTOR_PH:.*]]
773
+ ; I64: [[VECTOR_PH]]:
774
+ ; I64-NEXT: br label %[[VECTOR_BODY:.*]]
775
+ ; I64: [[VECTOR_BODY]]:
776
+ ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
777
+ ; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
778
+ ; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
779
+ ; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
780
+ ; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
781
+ ; I64-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
782
+ ; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
783
+ ; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
784
+ ; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
785
+ ; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
786
+ ; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
787
+ ; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
788
+ ; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
789
+ ; I64-NEXT: [[TMP12:%.*]] = mul i64 [[TMP4]], [[OFFSET]]
790
+ ; I64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP5]], [[OFFSET]]
791
+ ; I64-NEXT: [[TMP14:%.*]] = mul i64 [[TMP6]], [[OFFSET]]
792
+ ; I64-NEXT: [[TMP15:%.*]] = mul i64 [[TMP7]], [[OFFSET]]
793
+ ; I64-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP8]]
794
+ ; I64-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP9]]
795
+ ; I64-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP10]]
796
+ ; I64-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP11]]
797
+ ; I64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP12]]
798
+ ; I64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP13]]
799
+ ; I64-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP14]]
800
+ ; I64-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP15]]
801
+ ; I64-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP16]], align 4
802
+ ; I64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP17]], align 4
803
+ ; I64-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP18]], align 4
804
+ ; I64-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP19]], align 4
805
+ ; I64-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP20]], align 4
806
+ ; I64-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP21]], align 4
807
+ ; I64-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP22]], align 4
808
+ ; I64-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP23]], align 4
809
+ ; I64-NEXT: [[TMP32:%.*]] = sext i32 [[TMP24]] to i64
810
+ ; I64-NEXT: [[TMP33:%.*]] = sext i32 [[TMP25]] to i64
811
+ ; I64-NEXT: [[TMP34:%.*]] = sext i32 [[TMP26]] to i64
812
+ ; I64-NEXT: [[TMP35:%.*]] = sext i32 [[TMP27]] to i64
813
+ ; I64-NEXT: [[TMP36:%.*]] = sext i32 [[TMP28]] to i64
814
+ ; I64-NEXT: [[TMP37:%.*]] = sext i32 [[TMP29]] to i64
815
+ ; I64-NEXT: [[TMP38:%.*]] = sext i32 [[TMP30]] to i64
816
+ ; I64-NEXT: [[TMP39:%.*]] = sext i32 [[TMP31]] to i64
817
+ ; I64-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP32]]
818
+ ; I64-NEXT: [[TMP41:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP33]]
819
+ ; I64-NEXT: [[TMP42:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP34]]
820
+ ; I64-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP35]]
821
+ ; I64-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP36]]
822
+ ; I64-NEXT: [[TMP45:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP37]]
823
+ ; I64-NEXT: [[TMP46:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP38]]
824
+ ; I64-NEXT: [[TMP47:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP39]]
825
+ ; I64-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 -8
826
+ ; I64-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[TMP41]], i64 -8
827
+ ; I64-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP42]], i64 -8
828
+ ; I64-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[TMP43]], i64 -8
829
+ ; I64-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP44]], i64 -8
830
+ ; I64-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[TMP45]], i64 -8
831
+ ; I64-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP46]], i64 -8
832
+ ; I64-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP47]], i64 -8
833
+ ; I64-NEXT: [[TMP56:%.*]] = load double, ptr [[TMP48]], align 8
834
+ ; I64-NEXT: [[TMP57:%.*]] = load double, ptr [[TMP49]], align 8
835
+ ; I64-NEXT: [[TMP58:%.*]] = insertelement <2 x double> poison, double [[TMP56]], i32 0
836
+ ; I64-NEXT: [[TMP59:%.*]] = insertelement <2 x double> [[TMP58]], double [[TMP57]], i32 1
837
+ ; I64-NEXT: [[TMP60:%.*]] = load double, ptr [[TMP50]], align 8
838
+ ; I64-NEXT: [[TMP61:%.*]] = load double, ptr [[TMP51]], align 8
839
+ ; I64-NEXT: [[TMP62:%.*]] = insertelement <2 x double> poison, double [[TMP60]], i32 0
840
+ ; I64-NEXT: [[TMP63:%.*]] = insertelement <2 x double> [[TMP62]], double [[TMP61]], i32 1
841
+ ; I64-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP52]], align 8
842
+ ; I64-NEXT: [[TMP65:%.*]] = load double, ptr [[TMP53]], align 8
843
+ ; I64-NEXT: [[TMP66:%.*]] = insertelement <2 x double> poison, double [[TMP64]], i32 0
844
+ ; I64-NEXT: [[TMP67:%.*]] = insertelement <2 x double> [[TMP66]], double [[TMP65]], i32 1
845
+ ; I64-NEXT: [[TMP68:%.*]] = load double, ptr [[TMP54]], align 8
846
+ ; I64-NEXT: [[TMP69:%.*]] = load double, ptr [[TMP55]], align 8
847
+ ; I64-NEXT: [[TMP70:%.*]] = insertelement <2 x double> poison, double [[TMP68]], i32 0
848
+ ; I64-NEXT: [[TMP71:%.*]] = insertelement <2 x double> [[TMP70]], double [[TMP69]], i32 1
849
+ ; I64-NEXT: [[TMP72:%.*]] = fsub <2 x double> zeroinitializer, [[TMP59]]
850
+ ; I64-NEXT: [[TMP73:%.*]] = fsub <2 x double> zeroinitializer, [[TMP63]]
851
+ ; I64-NEXT: [[TMP74:%.*]] = fsub <2 x double> zeroinitializer, [[TMP67]]
852
+ ; I64-NEXT: [[TMP75:%.*]] = fsub <2 x double> zeroinitializer, [[TMP71]]
853
+ ; I64-NEXT: [[TMP76:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP8]]
854
+ ; I64-NEXT: [[TMP77:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP9]]
855
+ ; I64-NEXT: [[TMP78:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP10]]
856
+ ; I64-NEXT: [[TMP79:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP11]]
857
+ ; I64-NEXT: [[TMP80:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP12]]
858
+ ; I64-NEXT: [[TMP81:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP13]]
859
+ ; I64-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP14]]
860
+ ; I64-NEXT: [[TMP83:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP15]]
861
+ ; I64-NEXT: [[TMP84:%.*]] = extractelement <2 x double> [[TMP72]], i32 0
862
+ ; I64-NEXT: store double [[TMP84]], ptr [[TMP76]], align 8
863
+ ; I64-NEXT: [[TMP85:%.*]] = extractelement <2 x double> [[TMP72]], i32 1
864
+ ; I64-NEXT: store double [[TMP85]], ptr [[TMP77]], align 8
865
+ ; I64-NEXT: [[TMP86:%.*]] = extractelement <2 x double> [[TMP73]], i32 0
866
+ ; I64-NEXT: store double [[TMP86]], ptr [[TMP78]], align 8
867
+ ; I64-NEXT: [[TMP87:%.*]] = extractelement <2 x double> [[TMP73]], i32 1
868
+ ; I64-NEXT: store double [[TMP87]], ptr [[TMP79]], align 8
869
+ ; I64-NEXT: [[TMP88:%.*]] = extractelement <2 x double> [[TMP74]], i32 0
870
+ ; I64-NEXT: store double [[TMP88]], ptr [[TMP80]], align 8
871
+ ; I64-NEXT: [[TMP89:%.*]] = extractelement <2 x double> [[TMP74]], i32 1
872
+ ; I64-NEXT: store double [[TMP89]], ptr [[TMP81]], align 8
873
+ ; I64-NEXT: [[TMP90:%.*]] = extractelement <2 x double> [[TMP75]], i32 0
874
+ ; I64-NEXT: store double [[TMP90]], ptr [[TMP82]], align 8
875
+ ; I64-NEXT: [[TMP91:%.*]] = extractelement <2 x double> [[TMP75]], i32 1
876
+ ; I64-NEXT: store double [[TMP91]], ptr [[TMP83]], align 8
877
+ ; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
878
+ ; I64-NEXT: [[TMP92:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
879
+ ; I64-NEXT: br i1 [[TMP92]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
880
+ ; I64: [[MIDDLE_BLOCK]]:
881
+ ; I64-NEXT: br label %[[SCALAR_PH:.*]]
882
+ ; I64: [[SCALAR_PH]]:
883
+ ;
884
+ ; I32-LABEL: define void @address_use_in_different_block(
885
+ ; I32-SAME: ptr noalias [[DST:%.*]], ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
886
+ ; I32-NEXT: [[ENTRY:.*:]]
887
+ ; I32-NEXT: [[X_POS:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
888
+ ; I32-NEXT: [[OFFSET:%.*]] = zext i32 [[X_POS]] to i64
889
+ ; I32-NEXT: br label %[[VECTOR_PH:.*]]
890
+ ; I32: [[VECTOR_PH]]:
891
+ ; I32-NEXT: br label %[[VECTOR_BODY:.*]]
892
+ ; I32: [[VECTOR_BODY]]:
893
+ ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
894
+ ; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
895
+ ; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
896
+ ; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
897
+ ; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
898
+ ; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
899
+ ; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
900
+ ; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
901
+ ; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
902
+ ; I32-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP4]]
903
+ ; I32-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP5]]
904
+ ; I32-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP6]]
905
+ ; I32-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC_0]], i64 [[TMP7]]
906
+ ; I32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4
907
+ ; I32-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4
908
+ ; I32-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 4
909
+ ; I32-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
910
+ ; I32-NEXT: [[TMP16:%.*]] = sext i32 [[TMP12]] to i64
911
+ ; I32-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64
912
+ ; I32-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64
913
+ ; I32-NEXT: [[TMP19:%.*]] = sext i32 [[TMP15]] to i64
914
+ ; I32-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP16]]
915
+ ; I32-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP17]]
916
+ ; I32-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP18]]
917
+ ; I32-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[TMP19]]
918
+ ; I32-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP20]], i64 -8
919
+ ; I32-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 -8
920
+ ; I32-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 -8
921
+ ; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP23]], i64 -8
922
+ ; I32-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP24]], align 8
923
+ ; I32-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP25]], align 8
924
+ ; I32-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP26]], align 8
925
+ ; I32-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP27]], align 8
926
+ ; I32-NEXT: [[TMP32:%.*]] = insertelement <4 x double> poison, double [[TMP28]], i32 0
927
+ ; I32-NEXT: [[TMP33:%.*]] = insertelement <4 x double> [[TMP32]], double [[TMP29]], i32 1
928
+ ; I32-NEXT: [[TMP34:%.*]] = insertelement <4 x double> [[TMP33]], double [[TMP30]], i32 2
929
+ ; I32-NEXT: [[TMP35:%.*]] = insertelement <4 x double> [[TMP34]], double [[TMP31]], i32 3
930
+ ; I32-NEXT: [[TMP36:%.*]] = fsub <4 x double> zeroinitializer, [[TMP35]]
931
+ ; I32-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]]
932
+ ; I32-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP5]]
933
+ ; I32-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP6]]
934
+ ; I32-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP7]]
935
+ ; I32-NEXT: [[TMP41:%.*]] = extractelement <4 x double> [[TMP36]], i32 0
936
+ ; I32-NEXT: store double [[TMP41]], ptr [[TMP37]], align 8
937
+ ; I32-NEXT: [[TMP42:%.*]] = extractelement <4 x double> [[TMP36]], i32 1
938
+ ; I32-NEXT: store double [[TMP42]], ptr [[TMP38]], align 8
939
+ ; I32-NEXT: [[TMP43:%.*]] = extractelement <4 x double> [[TMP36]], i32 2
940
+ ; I32-NEXT: store double [[TMP43]], ptr [[TMP39]], align 8
941
+ ; I32-NEXT: [[TMP44:%.*]] = extractelement <4 x double> [[TMP36]], i32 3
942
+ ; I32-NEXT: store double [[TMP44]], ptr [[TMP40]], align 8
943
+ ; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
944
+ ; I32-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
945
+ ; I32-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
946
+ ; I32: [[MIDDLE_BLOCK]]:
947
+ ; I32-NEXT: br label %[[SCALAR_PH:.*]]
948
+ ; I32: [[SCALAR_PH]]:
949
+ ;
950
+ entry:
951
+ %x.pos = call i32 @llvm.smax.i32 (i32 %x , i32 0 )
952
+ %offset = zext i32 %x.pos to i64
953
+ br label %loop.header
954
+
955
+ loop.header:
956
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop.latch ]
957
+ %7 = mul i64 %iv , %offset
958
+ %gep.src.0 = getelementptr i32 , ptr %src.0 , i64 %7
959
+ %l8 = load i32 , ptr %gep.src.0 , align 4
960
+ %c = icmp sgt i32 %x , 0
961
+ br i1 %c , label %loop.latch , label %then
962
+
963
+ then:
964
+ br label %loop.latch
965
+
966
+ loop.latch:
967
+ %l.ext = sext i32 %l8 to i64
968
+ %gep.src.1 = getelementptr double , ptr %src.1 , i64 %l.ext
969
+ %13 = getelementptr i8 , ptr %gep.src.1 , i64 -8
970
+ %l.2 = load double , ptr %13 , align 8
971
+ %sub = fsub double 0 .000000e+00 , %l.2
972
+ %gep.dst = getelementptr double , ptr %dst , i64 %7
973
+ store double %sub , ptr %gep.dst , align 8
974
+ %iv.next = add i64 %iv , 1
975
+ %ec = icmp eq i64 %iv , 100
976
+ br i1 %ec , label %exit , label %loop.header
977
+
978
+ exit:
979
+ ret void
980
+ }
981
+
778
982
attributes #0 = { "target-cpu" ="znver2" }
779
983
780
984
!0 = distinct !{!0 , !1 }
0 commit comments