@@ -801,6 +801,103 @@ entry:
801801 %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
802802 ret <4 x float > %vecins.3
803803}
804+ declare float @atan2f (float ,float ) readonly nounwind willreturn
805+ define <4 x float > @atan2_4x (ptr %a , ptr %b ) {
806+ ; CHECK-LABEL: @atan2_4x(
807+ ; CHECK-NEXT: entry:
808+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
809+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
810+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
811+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
812+ ;
813+ ; NOACCELERATE-LABEL: @atan2_4x(
814+ ; NOACCELERATE-NEXT: entry:
815+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
816+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
817+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
818+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
819+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]])
820+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
821+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
822+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
823+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
824+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
825+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
826+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
827+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
828+ ; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
829+ ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
830+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
831+ ;
832+ entry:
833+ %0 = load <4 x float >, ptr %a , align 16
834+ %bb = load <4 x float >, ptr %b , align 16
835+ %vecext = extractelement <4 x float > %0 , i32 0
836+ %vecextb = extractelement <4 x float > %bb , i32 0
837+ %1 = tail call fast float @atan2f (float %vecext , float %vecextb )
838+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
839+ %vecext.1 = extractelement <4 x float > %0 , i32 1
840+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
841+ %2 = tail call fast float @atan2f (float %vecext.1 , float %vecextb.1 )
842+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
843+ %vecext.2 = extractelement <4 x float > %0 , i32 2
844+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
845+ %3 = tail call fast float @atan2f (float %vecext.2 , float %vecextb.2 )
846+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
847+ %vecext.3 = extractelement <4 x float > %0 , i32 3
848+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
849+ %4 = tail call fast float @atan2f (float %vecext.3 , float %vecextb.3 )
850+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
851+ ret <4 x float > %vecins.3
852+ }
853+ define <4 x float > @int_atan2_4x (ptr %a , ptr %b ) {
854+ ; CHECK-LABEL: @int_atan2_4x(
855+ ; CHECK-NEXT: entry:
856+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
857+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
858+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
859+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
860+ ;
861+ ; NOACCELERATE-LABEL: @int_atan2_4x(
862+ ; NOACCELERATE-NEXT: entry:
863+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
864+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
865+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
866+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
867+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]])
868+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
869+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
870+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
871+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
872+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
873+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
874+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
875+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
876+ ; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
877+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
878+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
879+ ;
880+ entry:
881+ %0 = load <4 x float >, ptr %a , align 16
882+ %bb = load <4 x float >, ptr %b , align 16
883+ %vecext = extractelement <4 x float > %0 , i32 0
884+ %vecextb = extractelement <4 x float > %bb , i32 0
885+ %1 = tail call fast float @llvm.atan2.f32 (float %vecext , float %vecextb )
886+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
887+ %vecext.1 = extractelement <4 x float > %0 , i32 1
888+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
889+ %2 = tail call fast float @llvm.atan2.f32 (float %vecext.1 , float %vecextb.1 )
890+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
891+ %vecext.2 = extractelement <4 x float > %0 , i32 2
892+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
893+ %3 = tail call fast float @llvm.atan2.f32 (float %vecext.2 , float %vecextb.2 )
894+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
895+ %vecext.3 = extractelement <4 x float > %0 , i32 3
896+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
897+ %4 = tail call fast float @llvm.atan2.f32 (float %vecext.3 , float %vecextb.3 )
898+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
899+ ret <4 x float > %vecins.3
900+ }
804901declare float @sinhf (float ) readonly nounwind willreturn
805902define <4 x float > @sinh_4x (ptr %a ) {
806903; CHECK-LABEL: @sinh_4x(
0 commit comments