@@ -15,71 +15,130 @@ function __init__()
1515 end
1616end
1717
18+ # # list all functions
19+ # all functions that work for Floats and ComplexFloats
20+ unary_real_complex = (
21+ (:acos, :acos!, :Acos),
22+ (:asin, :asin!, :Asin),
23+ (:acosh, :acosh!, :Acosh),
24+ (:asinh, :asinh!, :Asinh),
25+ (:sqrt, :sqrt!, :Sqrt),
26+ (:exp, :exp!, :Exp),
27+ (:log, :log!, :Ln),
28+ )
29+
30+ binary_real_complex = (
31+ (:pow, :pow!, :Pow, true ),
32+ (:divide, :divide!, :Div, true ),
33+ )
34+
35+ # all functions that work for Floats only
36+ unary_real = (
37+ (:cbrt, :cbrt!, :Cbrt),
38+ (:expm1, :expm1!, :Expm1),
39+ (:log1p, :log1p!, :Log1p),
40+ (:log2, :log2!, :Log2),
41+ (:abs, :abs!, :Abs),
42+ (:abs2, :abs2!, :Sqr),
43+ (:ceil, :ceil!, :Ceil),
44+ (:floor, :floor!, :Floor),
45+ (:round, :round!, :Round),
46+ (:trunc, :trunc!, :Trunc),
47+ (:cospi, :cospi!, :Cospi),
48+ (:sinpi, :sinpi!, :Sinpi),
49+ (:tanpi, :tanpi!, :Tanpi),
50+ (:acospi, :acospi!, :Acospi),
51+ (:asinpi, :asinpi!, :Asinpi),
52+ (:atanpi, :atanpi!, :Atanpi),
53+ (:cosd, :cosd!, :Cosd),
54+ (:sind, :sind!, :Sind),
55+ (:tand, :tand!, :Tand),
56+ # Enabled only for Real. MKL guarantees higher accuracy, but at a
57+ # substantial performance cost.
58+ (:atan, :atan!, :Atan),
59+ (:cos, :cos!, :Cos),
60+ (:sin, :sin!, :Sin),
61+ (:tan, :tan!, :Tan),
62+ (:atanh, :atanh!, :Atanh),
63+ (:cosh, :cosh!, :Cosh),
64+ (:sinh, :sinh!, :Sinh),
65+ (:tanh, :tanh!, :Tanh),
66+ (:log10, :log10!, :Log10),
67+ # now in SpecialFunctions (make smart, maybe?)
68+ (:erf, :erf!, :Erf),
69+ (:erfc, :erfc!, :Erfc),
70+ (:erfinv, :erfinv!, :ErfInv),
71+ (:erfcinv, :erfcinv!, :ErfcInv),
72+ (:lgamma, :lgamma!, :LGamma),
73+ (:gamma, :gamma!, :TGamma),
74+ # Not in Base
75+ (:inv_cbrt, :inv_cbrt!, :InvCbrt),
76+ (:inv_sqrt, :inv_sqrt!, :InvSqrt),
77+ (:pow2o3, :pow2o3!, :Pow2o3),
78+ (:pow3o2, :pow3o2!, :Pow3o2),
79+ )
80+
81+ binary_real = (
82+ (:atan, :atan!, :Atan2, false ),
83+ (:hypot, :hypot!, :Hypot, false ),
84+ # Not in Base
85+ (:atanpi, :atanpi!, :Atan2pi, false ),
86+ )
87+
88+ unary_complex_in = (
89+ (:abs, :abs!, :Abs),
90+ (:angle, :angle!, :Arg),
91+ )
92+
93+ unary_complex_inout = (
94+ (:conj, :conj!, :Conj),
95+ )
96+
97+ # # define functions from previous list for all eligible input types
98+
1899for t in (Float32, Float64, ComplexF32, ComplexF64)
19100 # Unary, real or complex
20- def_unary_op(t, t, :acos, :acos!, :Acos)
21- def_unary_op(t, t, :asin, :asin!, :Asin)
22- def_unary_op(t, t, :acosh, :acosh!, :Acosh)
23- def_unary_op(t, t, :asinh, :asinh!, :Asinh)
24- def_unary_op(t, t, :sqrt, :sqrt!, :Sqrt)
25- def_unary_op(t, t, :exp, :exp!, :Exp)
26- def_unary_op(t, t, :log, :log!, :Ln)
101+ for (f, f!, f_mkl) in unary_real_complex
102+ def_unary_op(t, t, f, f!, f_mkl)
103+ end
27104
28105 # # Binary, real or complex
29- def_binary_op(t, t, :pow, :pow!, :Pow, true )
30- def_binary_op(t, t, :divide, :divide!, :Div, true )
106+ for (f, f!, f_mkl, broadcast) in binary_real_complex
107+ def_binary_op(t, t, f, f!, f_mkl, broadcast)
108+ end
31109end
32110
33111for t in (Float32, Float64)
34- # Unary, real-only
35- def_unary_op(t, t, :cbrt, :cbrt!, :Cbrt)
36- def_unary_op(t, t, :expm1, :expm1!, :Expm1)
37- def_unary_op(t, t, :log1p, :log1p!, :Log1p)
38- def_unary_op(t, t, :log2, :log2!, :Log2)
39- def_unary_op(t, t, :abs, :abs!, :Abs)
40- def_unary_op(t, t, :abs2, :abs2!, :Sqr)
41- def_unary_op(t, t, :ceil, :ceil!, :Ceil)
42- def_unary_op(t, t, :floor, :floor!, :Floor)
43- def_unary_op(t, t, :round, :round!, :Round)
44- def_unary_op(t, t, :trunc, :trunc!, :Trunc)
112+ # Unary, real only
113+ for (f, f!, f_mkl) in unary_real
114+ def_unary_op(t, t, f, f!, f_mkl)
115+ end
45116
46- # Enabled only for Real. MKL guarantees higher accuracy, but at a
47- # substantial performance cost.
48- def_unary_op(t, t, :atan, :atan!, :Atan)
49- def_unary_op(t, t, :cos, :cos!, :Cos)
50- def_unary_op(t, t, :sin, :sin!, :Sin)
51- def_unary_op(t, t, :tan, :tan!, :Tan)
52- def_unary_op(t, t, :atanh, :atanh!, :Atanh)
53- def_unary_op(t, t, :cosh, :cosh!, :Cosh)
54- def_unary_op(t, t, :sinh, :sinh!, :Sinh)
55- def_unary_op(t, t, :tanh, :tanh!, :Tanh)
56- def_unary_op(t, t, :log10, :log10!, :Log10)
57-
58- # Unary, real-only
59- def_unary_op(t, t, :cospi, :cospi!, :Cospi)
60- def_unary_op(t, t, :sinpi, :sinpi!, :Sinpi)
61- def_unary_op(t, t, :tanpi, :tanpi!, :Tanpi)
62- def_unary_op(t, t, :acospi, :acospi!, :Acospi)
63- def_unary_op(t, t, :asinpi, :asinpi!, :Asinpi)
64- def_unary_op(t, t, :atanpi, :atanpi!, :Atanpi)
65- def_unary_op(t, t, :cosd, :cosd!, :Cosd)
66- def_unary_op(t, t, :sind, :sind!, :Sind)
67- def_unary_op(t, t, :tand, :tand!, :Tand)
117+ for (f, f!, f_mkl, broadcast) in binary_real
118+ def_binary_op(t, t, f, f!, f_mkl, broadcast)
119+ end
120+
121+ # Unary, complex-only
122+ for (f, f!, f_mkl) in unary_complex_inout
123+ def_unary_op(Complex{t}, Complex{t}, f, f!, f_mkl)
124+ end
125+ for (f, f!, f_mkl) in unary_complex_in
126+ def_unary_op(Complex{t}, t, f, f!, f_mkl)
127+ end
128+
129+ # ## cis is special, IntelVectorMath function is based on output
130+ def_unary_op(t, Complex{t}, :cis, :cis!, :CIS; vmltype= Complex{t})
68131
69132 def_one2two_op(t, t, :sincos, :sincos!, :SinCos)
70133
71- # now in SpecialFunctions (make smart, maybe?)
72- def_unary_op(t, t, :erf, :erf!, :Erf)
73- def_unary_op(t, t, :erfc, :erfc!, :Erfc)
74- def_unary_op(t, t, :erfinv, :erfinv!, :ErfInv)
75- def_unary_op(t, t, :erfcinv, :erfcinv!, :ErfcInv)
76- def_unary_op(t, t, :lgamma, :lgamma!, :LGamma)
77- def_unary_op(t, t, :gamma, :gamma!, :TGamma)
78- # Not in Base
79- def_unary_op(t, t, :inv_cbrt, :inv_cbrt!, :InvCbrt)
80- def_unary_op(t, t, :inv_sqrt, :inv_sqrt!, :InvSqrt)
81- def_unary_op(t, t, :pow2o3, :pow2o3!, :Pow2o3)
82- def_unary_op(t, t, :pow3o2, :pow3o2!, :Pow3o2)
134+ # Binary, complex-only. These are more accurate but performance is
135+ # either equivalent to Base or slower.
136+ # def_binary_op(Complex{t}, Complex{t}, (:+), :add!, :Add, false)
137+ # def_binary_op(Complex{t}, Complex{t}, (:.+), :add!, :Add, true)
138+ # def_binary_op(Complex{t}, Complex{t}, (:.*), :multiply!, :Mul, true)
139+ # def_binary_op(Complex{t}, Complex{t}, (:-), :subtract!, :Sub, false)
140+ # def_binary_op(Complex{t}, Complex{t}, (:.-), :subtract!, :Sub, true)
141+ # def_binary_op(Complex{t}, Complex{t}, :multiply_conj, :multiply_conj!, :Mul, false)
83142
84143 # # .^ to scalar power
85144 # mklfn = Base.Meta.quot(Symbol("$(vml_prefix(t))Powx"))
@@ -98,28 +157,6 @@ for t in (Float32, Float64)
98157 # out
99158 # end
100159 # end
101-
102- # # Binary, real-only
103- def_binary_op(t, t, :atan, :atan!, :Atan2, false )
104- def_binary_op(t, t, :atanpi, :atanpi!, :Atan2pi, false )
105- def_binary_op(t, t, :hypot, :hypot!, :Hypot, false )
106-
107- # Unary, complex-only
108- def_unary_op(Complex{t}, Complex{t}, :conj, :conj!, :Conj)
109- def_unary_op(Complex{t}, t, :abs, :abs!, :Abs)
110- def_unary_op(Complex{t}, t, :angle, :angle!, :Arg)
111-
112- # ## cis is special, IntelVectorMath function is based on output
113- def_unary_op(t, Complex{t}, :cis, :cis!, :CIS; vmltype = Complex{t})
114-
115- # Binary, complex-only. These are more accurate but performance is
116- # either equivalent to Base or slower.
117- # def_binary_op(Complex{t}, Complex{t}, (:+), :add!, :Add, false)
118- # def_binary_op(Complex{t}, Complex{t}, (:.+), :add!, :Add, true)
119- # def_binary_op(Complex{t}, Complex{t}, (:.*), :multiply!, :Mul, true)
120- # def_binary_op(Complex{t}, Complex{t}, (:-), :subtract!, :Sub, false)
121- # def_binary_op(Complex{t}, Complex{t}, (:.-), :subtract!, :Sub, true)
122- # def_binary_op(Complex{t}, Complex{t}, :multiply_conj, :multiply_conj!, :Mul, false)
123160end
124161
125162export VML_LA, VML_HA, VML_EP, vml_set_accuracy, vml_get_accuracy
0 commit comments