@@ -801,6 +801,106 @@ entry:
801801 %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
802802 ret <4 x float > %vecins.3
803803}
804+ declare float @atan2f (float ,float ) readonly nounwind willreturn
805+ define <4 x float > @atan2_4x (ptr %a , ptr %b ) {
806+ ; CHECK-LABEL: @atan2_4x(
807+ ; CHECK-NEXT: entry:
808+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
809+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
810+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
811+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
812+ ;
813+ ; NOACCELERATE-LABEL: @atan2_4x(
814+ ; NOACCELERATE-NEXT: entry:
815+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
816+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
817+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
818+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
819+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]])
820+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
821+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
822+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
823+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
824+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
825+ ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
826+ ; NOACCELERATE-NEXT: [[VECEXTB_2:%.*]] = extractelement <4 x float> [[BB]], i32 2
827+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atan2f(float [[VECEXT_2]], float [[VECEXTB_2]])
828+ ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
829+ ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
830+ ; NOACCELERATE-NEXT: [[VECEXTB_3:%.*]] = extractelement <4 x float> [[BB]], i32 3
831+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atan2f(float [[VECEXT_3]], float [[VECEXTB_3]])
832+ ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
833+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
834+ ;
835+ entry:
836+ %0 = load <4 x float >, ptr %a , align 16
837+ %bb = load <4 x float >, ptr %b , align 16
838+ %vecext = extractelement <4 x float > %0 , i32 0
839+ %vecextb = extractelement <4 x float > %bb , i32 0
840+ %1 = tail call fast float @atan2f (float %vecext , float %vecextb )
841+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
842+ %vecext.1 = extractelement <4 x float > %0 , i32 1
843+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
844+ %2 = tail call fast float @atan2f (float %vecext.1 , float %vecextb.1 )
845+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
846+ %vecext.2 = extractelement <4 x float > %0 , i32 2
847+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
848+ %3 = tail call fast float @atan2f (float %vecext.2 , float %vecextb.2 )
849+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
850+ %vecext.3 = extractelement <4 x float > %0 , i32 3
851+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
852+ %4 = tail call fast float @atan2f (float %vecext.3 , float %vecextb.3 )
853+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
854+ ret <4 x float > %vecins.3
855+ }
856+ define <4 x float > @int_atan2_4x (ptr %a , ptr %b ) {
857+ ; CHECK-LABEL: @int_atan2_4x(
858+ ; CHECK-NEXT: entry:
859+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
860+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
861+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
862+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
863+ ;
864+ ; NOACCELERATE-LABEL: @int_atan2_4x(
865+ ; NOACCELERATE-NEXT: entry:
866+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
867+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
868+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
869+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
870+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]])
871+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
872+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
873+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
874+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
875+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
876+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
877+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
878+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
879+ ; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
880+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
881+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
882+ ;
883+ entry:
884+ %0 = load <4 x float >, ptr %a , align 16
885+ %bb = load <4 x float >, ptr %b , align 16
886+ %vecext = extractelement <4 x float > %0 , i32 0
887+ %vecextb = extractelement <4 x float > %bb , i32 0
888+ %1 = tail call fast float @llvm.atan2.f32 (float %vecext , float %vecextb )
889+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
890+ %vecext.1 = extractelement <4 x float > %0 , i32 1
891+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
892+ %2 = tail call fast float @llvm.atan2.f32 (float %vecext.1 , float %vecextb.1 )
893+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
894+ %vecext.2 = extractelement <4 x float > %0 , i32 2
895+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
896+ %3 = tail call fast float @llvm.atan2.f32 (float %vecext.2 , float %vecextb.2 )
897+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
898+ %vecext.3 = extractelement <4 x float > %0 , i32 3
899+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
900+ %4 = tail call fast float @llvm.atan2.f32 (float %vecext.3 , float %vecextb.3 )
901+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
902+ ret <4 x float > %vecins.3
903+ }
804904declare float @sinhf (float ) readonly nounwind willreturn
805905define <4 x float > @sinh_4x (ptr %a ) {
806906; CHECK-LABEL: @sinh_4x(
0 commit comments