Skip to content

Commit ade704c

Browse files
authored
Improve M1 support (#183)
* Improve M1 support * apple silicon params * No `strict=true` for `makedocs`
1 parent dcc448d commit ade704c

File tree

10 files changed

+77
-64
lines changed

10 files changed

+77
-64
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Octavian"
22
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
33
authors = ["Chris Elrod", "Dilum Aluthge", "Mason Protter", "contributors"]
4-
version = "0.3.25"
4+
version = "0.3.26"
55

66
[deps]
77
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"

benchmark/tilesearch.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ function matrix_range(S, ::Type{T} = Float64) where {T}
133133
Cs, As, Bs
134134
end
135135

136-
T = Float64
136+
T = Float32
137137
min_size = round(
138138
Int,
139139
sqrt(

docs/make.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ makedocs(;
1717
"Public API" => "public-api.md",
1818
"Internals (Private)" => "internals.md"
1919
],
20-
strict = true
2120
)
2221

2322
deploydocs(; repo = "github.com/JuliaLinearAlgebra/Octavian.jl")

ext/ForwardDiffExt.jl

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@ module ForwardDiffExt
22

33
using ForwardDiff: ForwardDiff
44

5-
using Octavian: ArrayInterface,
6-
@turbo, @tturbo,
7-
One, Zero,
8-
indices, static
5+
using Octavian: ArrayInterface, @turbo, @tturbo, One, Zero, indices, static
96
import Octavian: real_rep, _matmul!, _matmul_serial!
107

118
real_rep(a::AbstractArray{DualT}) where {TAG,T,DualT<:ForwardDiff.Dual{TAG,T}} =
@@ -53,9 +50,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
5350
MKN = nothing
5451
) where {TAG,T,DualT<:ForwardDiff.Dual{TAG,T}}
5552
if Bool(ArrayInterface.is_dense(_C)) &&
56-
Bool(ArrayInterface.is_column_major(_C)) &&
57-
Bool(ArrayInterface.is_dense(_A)) &&
58-
Bool(ArrayInterface.is_column_major(_A))
53+
Bool(ArrayInterface.is_column_major(_C)) &&
54+
Bool(ArrayInterface.is_dense(_A)) &&
55+
Bool(ArrayInterface.is_column_major(_A))
5956
# we can avoid the reshape and call the standard method
6057
A = reinterpret(T, _A)
6158
C = reinterpret(T, _C)
@@ -94,9 +91,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
9491
C = real_rep(_C)
9592
B = real_rep(_B)
9693
if Bool(ArrayInterface.is_dense(_C)) &&
97-
Bool(ArrayInterface.is_column_major(_C)) &&
98-
Bool(ArrayInterface.is_dense(_A)) &&
99-
Bool(ArrayInterface.is_column_major(_A))
94+
Bool(ArrayInterface.is_column_major(_C)) &&
95+
Bool(ArrayInterface.is_dense(_A)) &&
96+
Bool(ArrayInterface.is_column_major(_A))
10097
# we can avoid the reshape and call the standard method
10198
Ar = reinterpret(T, _A)
10299
Cr = reinterpret(T, _C)
@@ -151,7 +148,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
151148
_C
152149
end
153150

154-
# multiplication of dual matrix by standard vector/matrix from the right
151+
# multiplication of dual matrix by standard vector/matrix from the right
155152
@eval @inline function _matmul_serial!(
156153
_C::$(AbstractVectorOrMatrix){DualT},
157154
_A::AbstractMatrix{DualT},
@@ -161,9 +158,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
161158
MKN
162159
) where {TAG,T,DualT<:ForwardDiff.Dual{TAG,T}}
163160
if Bool(ArrayInterface.is_dense(_C)) &&
164-
Bool(ArrayInterface.is_column_major(_C)) &&
165-
Bool(ArrayInterface.is_dense(_A)) &&
166-
Bool(ArrayInterface.is_column_major(_A))
161+
Bool(ArrayInterface.is_column_major(_C)) &&
162+
Bool(ArrayInterface.is_dense(_A)) &&
163+
Bool(ArrayInterface.is_column_major(_A))
167164
# we can avoid the reshape and call the standard method
168165
A = reinterpret(T, _A)
169166
C = reinterpret(T, _C)
@@ -200,9 +197,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
200197
C = real_rep(_C)
201198
B = real_rep(_B)
202199
if Bool(ArrayInterface.is_dense(_C)) &&
203-
Bool(ArrayInterface.is_column_major(_C)) &&
204-
Bool(ArrayInterface.is_dense(_A)) &&
205-
Bool(ArrayInterface.is_column_major(_A))
200+
Bool(ArrayInterface.is_column_major(_C)) &&
201+
Bool(ArrayInterface.is_dense(_A)) &&
202+
Bool(ArrayInterface.is_column_major(_A))
206203
# we can avoid the reshape and call the standard method
207204
Ar = reinterpret(T, _A)
208205
Cr = reinterpret(T, _C)

ext/HyperDualNumbersExt.jl

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
module HyperDualNumbersExt
22

33
using HyperDualNumbers: Hyper
4-
using Octavian: ArrayInterface,
5-
@turbo, @tturbo,
6-
One, Zero,
7-
indices, static
4+
using Octavian: ArrayInterface, @turbo, @tturbo, One, Zero, indices, static
85
import Octavian: real_rep, _matmul!, _matmul_serial!
96

107
real_rep(a::AbstractArray{DualT}) where {T,DualT<:Hyper{T}} =
@@ -23,7 +20,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
2320
nthread::Nothing = nothing,
2421
MKN = nothing,
2522
contig_axis = nothing
26-
) where {T, DualT<:Hyper{T}}
23+
) where {T,DualT<:Hyper{T}}
2724
B = real_rep(_B)
2825
C = real_rep(_C)
2926

@@ -52,9 +49,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
5249
MKN = nothing
5350
) where {T,DualT<:Hyper{T}}
5451
if Bool(ArrayInterface.is_dense(_C)) &&
55-
Bool(ArrayInterface.is_column_major(_C)) &&
56-
Bool(ArrayInterface.is_dense(_A)) &&
57-
Bool(ArrayInterface.is_column_major(_A))
52+
Bool(ArrayInterface.is_column_major(_C)) &&
53+
Bool(ArrayInterface.is_dense(_A)) &&
54+
Bool(ArrayInterface.is_column_major(_A))
5855
# we can avoid the reshape and call the standard method
5956
A = reinterpret(T, _A)
6057
C = reinterpret(T, _C)
@@ -93,9 +90,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
9390
C = real_rep(_C)
9491
B = real_rep(_B)
9592
if Bool(ArrayInterface.is_dense(_C)) &&
96-
Bool(ArrayInterface.is_column_major(_C)) &&
97-
Bool(ArrayInterface.is_dense(_A)) &&
98-
Bool(ArrayInterface.is_column_major(_A))
93+
Bool(ArrayInterface.is_column_major(_C)) &&
94+
Bool(ArrayInterface.is_dense(_A)) &&
95+
Bool(ArrayInterface.is_column_major(_A))
9996
# we can avoid the reshape and call the standard method
10097
Ar = reinterpret(T, _A)
10198
Cr = reinterpret(T, _C)
@@ -139,7 +136,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
139136
α,
140137
β,
141138
MKN
142-
) where {T, DualT<:Hyper{T}}
139+
) where {T,DualT<:Hyper{T}}
143140
B = real_rep(_B)
144141
C = real_rep(_C)
145142

@@ -157,7 +154,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
157154
_C
158155
end
159156

160-
# multiplication of dual matrix by standard vector/matrix from the right
157+
# multiplication of dual matrix by standard vector/matrix from the right
161158
@eval @inline function _matmul_serial!(
162159
_C::$(AbstractVectorOrMatrix){DualT},
163160
_A::AbstractMatrix{DualT},
@@ -167,9 +164,9 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
167164
MKN
168165
) where {T,DualT<:Hyper{T}}
169166
if Bool(ArrayInterface.is_dense(_C)) &&
170-
Bool(ArrayInterface.is_column_major(_C)) &&
171-
Bool(ArrayInterface.is_dense(_A)) &&
172-
Bool(ArrayInterface.is_column_major(_A))
167+
Bool(ArrayInterface.is_column_major(_C)) &&
168+
Bool(ArrayInterface.is_dense(_A)) &&
169+
Bool(ArrayInterface.is_column_major(_A))
173170
# we can avoid the reshape and call the standard method
174171
A = reinterpret(T, _A)
175172
C = reinterpret(T, _C)
@@ -201,14 +198,14 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
201198
α,
202199
β,
203200
MKN
204-
) where {T, DualT<:Hyper{T}}
201+
) where {T,DualT<:Hyper{T}}
205202
A = real_rep(_A)
206203
C = real_rep(_C)
207204
B = real_rep(_B)
208205
if Bool(ArrayInterface.is_dense(_C)) &&
209-
Bool(ArrayInterface.is_column_major(_C)) &&
210-
Bool(ArrayInterface.is_dense(_A)) &&
211-
Bool(ArrayInterface.is_column_major(_A))
206+
Bool(ArrayInterface.is_column_major(_C)) &&
207+
Bool(ArrayInterface.is_dense(_A)) &&
208+
Bool(ArrayInterface.is_column_major(_A))
212209
# we can avoid the reshape and call the standard method
213210
Ar = reinterpret(T, _A)
214211
Cr = reinterpret(T, _C)
@@ -246,4 +243,4 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
246243
end
247244
end # for
248245

249-
end # module
246+
end # module

src/Octavian.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,5 +75,4 @@ if !isdefined(Base, :get_extension)
7575
include("../ext/HyperDualNumbersExt.jl")
7676
end
7777

78-
7978
end # module Octavian

src/global_constants.jl

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ MᵣW_mul_factor(::True) = StaticInt{4}()
1818
MᵣW_mul_factor(::False) = StaticInt{9}()
1919
MᵣW_mul_factor() = MᵣW_mul_factor(has_feature(Val(:x86_64_avx512f)))
2020

21+
22+
if Sys.ARCH === :aarch64 && (Sys.isapple() || occursin("apple", Sys.CPU_NAME::String))
23+
W₁Default() = StaticFloat64{0.23015506935919203}()
24+
W₂Default() = StaticFloat64{0.16967706087713014}()
25+
R₁Default() = StaticFloat64{0.9982516031563079}()
26+
R₂Default() = StaticFloat64{0.5167030291302886}()
27+
else
2128
W₁Default(::True) = StaticFloat64{0.0007423708195588264}()
2229
W₂Default(::True) = StaticFloat64{0.7757548987718677}()
2330
R₁Default(::True) = StaticFloat64{0.7936663315339363}()
@@ -50,13 +57,14 @@ W₁Default() = W₁Default(has_feature(Val(:x86_64_avx512f)))
5057
W₂Default() = W₂Default(has_feature(Val(:x86_64_avx512f)))
5158
R₁Default() = R₁Default(has_feature(Val(:x86_64_avx512f)))
5259
R₂Default() = R₂Default(has_feature(Val(:x86_64_avx512f)))
53-
54-
@static if Sys.ARCH === :x86_64 || Sys.ARCH === :i686
55-
first_cache() = StaticInt{2}()
56-
else
57-
first_cache() = StaticInt{1}()
5860
end
5961

62+
# @static if Sys.ARCH === :x86_64 || Sys.ARCH === :i686
63+
first_cache() = StaticInt{2}()
64+
# else
65+
# first_cache() = StaticInt{1}()
66+
# end
67+
6068
second_cache() = first_cache() + One()
6169

6270
_first_cache_size(fcs::StaticInt) = ifelse(
@@ -69,7 +77,11 @@ first_cache_size() = _first_cache_size(cache_size(first_cache()))
6977

7078
_second_cache_size(scs::StaticInt, ::True) = scs - cache_size(first_cache())
7179
_second_cache_size(scs::StaticInt, ::False) = scs
72-
_second_cache_size(::StaticInt{0}, ::Nothing) = StaticInt(3145728)
80+
@static if (Sys.isapple() || occursin("apple", Sys.CPU_NAME::String)) && Sys.ARCH === :aarch64
81+
_second_cache_size(::StaticInt{0}, ::False) = StaticInt(100663296)
82+
else
83+
_second_cache_size(::StaticInt{0}, ::False) = StaticInt(3145728)
84+
end
7385
function second_cache_size()
7486
sc = second_cache()
7587
_second_cache_size(cache_size(sc), cache_inclusive(sc))

src/init.jl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,13 @@ function __init__()
1414
end
1515

1616
function init_bcache()
17+
BCACHEPTR[] == C_NULL || return
18+
c = Threads.nthreads() * second_cache_size()
1719
if bcache_count() Zero()
18-
if BCACHEPTR[] == C_NULL
19-
BCACHEPTR[] = VectorizationBase.valloc(
20-
Threads.nthreads() * second_cache_size() * bcache_count(),
21-
Cvoid,
22-
ccall(:jl_getpagesize, Int, ())
23-
)
24-
end
20+
c *= bcache_count()
2521
end
22+
BCACHEPTR[] =
23+
VectorizationBase.valloc(c, Cvoid, ccall(:jl_getpagesize, Int, ()))
2624
nothing
2725
end
2826

test/aqua.jl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
@testset "Aqua.jl" begin
2-
Aqua.test_all(Octavian; ambiguities = false, project_toml_formatting = false,
3-
stale_deps = (; ignore = [:ForwardDiff]))
2+
Aqua.test_all(
3+
Octavian;
4+
ambiguities = false,
5+
project_toml_formatting = false,
6+
stale_deps = (; ignore = [:ForwardDiff])
7+
)
48
@test isempty(Test.detect_ambiguities(Octavian))
59
end

test/hyperduals.jl

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ end
2626
@testset "real array from the right" begin
2727
A1dual = randdual(A1)
2828
C1dual = randdual(C1)
29-
29+
3030
A2dual = deepcopy(A1dual)
3131
B2 = deepcopy(B1)
3232
C2dual = deepcopy(C1dual)
@@ -49,7 +49,6 @@ end
4949
@test reinterpret(Float64, C1dual) reinterpret(Float64, C2dual)
5050
end
5151

52-
5352
@testset "transposed arrays" begin
5453
A1dual = randdual(A1')
5554
C1dual = randdual(C1)
@@ -67,13 +66,21 @@ end
6766

6867
Cref = zeros(Float64, size(C1)...)
6968
LinearAlgebra.mul!(Cref, A1, B1)
70-
@test (reinterpretHD(Float64, C1dual) reinterpretHD(Float64, C2dual)
71-
reinterpretHD(Float64, C3dual) reinterpretHD(Float64, C4dual) Cref) &&
72-
(reinterpret(Float64, C1dual) reinterpret(Float64, C2dual)
73-
reinterpret(Float64, C3dual) reinterpret(Float64, C4dual) )
69+
@test (
70+
reinterpretHD(Float64, C1dual)
71+
reinterpretHD(Float64, C2dual)
72+
reinterpretHD(Float64, C3dual)
73+
reinterpretHD(Float64, C4dual)
74+
Cref
75+
) && (
76+
reinterpret(Float64, C1dual)
77+
reinterpret(Float64, C2dual)
78+
reinterpret(Float64, C3dual)
79+
reinterpret(Float64, C4dual)
80+
)
7481
end
7582

76-
@testset "two dual arrays" begin
83+
@testset "two dual arrays" begin
7784
A1d = randdual(A1)
7885
B1d = randdual(B1)
7986
@test reinterpret(Float64, Octavian.matmul(A1d, B1d, 1.3))

0 commit comments

Comments
 (0)