Skip to content

Commit 16e0b18

Browse files
add experimental multithreading via openmp
1 parent 24f040d commit 16e0b18

File tree

11 files changed

+157
-32
lines changed

11 files changed

+157
-32
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
docs/build/
2-
docs/site/
2+
docs/site/
3+
*/*.dylib

deps/Makefile

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
ifeq ($(OS), Windows_NT)
2+
SLIB = dll
3+
else
4+
UNAME := $(shell uname)
5+
ifeq ($(UNAME), Darwin)
6+
SLIB = dylib
7+
else
8+
SLIB = so
9+
endif
10+
endif
11+
12+
OBJ = Rotations.c
13+
CFLAGS = -std=c11 -Ofast -march=native
14+
LIBFLAGS = -shared -fPIC -lm -lgomp
15+
16+
all:
17+
make seriallib
18+
make parallellib
19+
20+
serial:
21+
gcc-4.9 $(CFLAGS) $(OBJ) -o rot
22+
23+
parallel:
24+
gcc-4.9 -fopenmp $(CFLAGS) $(OBJ) -o rotpar
25+
26+
seriallib:
27+
gcc-4.9 $(CFLAGS) $(LIBFLAGS) $(OBJ) -o rot.$(SLIB)
28+
29+
parallellib:
30+
gcc-4.9 -fopenmp $(CFLAGS) $(LIBFLAGS) $(OBJ) -o rotpar.$(SLIB)

deps/Rotations.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#include <stdlib.h>
2+
#include <math.h>
3+
4+
#ifdef _OPENMP
5+
#include <omp.h>
6+
#else
7+
#define omp_get_threat_num() 0
8+
#define omp_get_num_procs() 8
9+
#endif
10+
11+
void julia_apply_givens(double * A, const double * snm, const double * cnm, const int N, const int M);
12+
void julia_apply_givens_t(double * A, const double * snm, const double * cnm, const int N, const int M);
13+
14+
int main(void) {
15+
return 0;
16+
}
17+
18+
void julia_apply_givens(double * A, const double * snm, const double * cnm, const int N, const int M) {
19+
#pragma omp parallel for schedule(dynamic)
20+
for (int m = M/2; m > 1; m--) {
21+
double s, c;
22+
double a1, a2, a3, a4;
23+
for (int j = m; j > 1; j = j-2) {
24+
for (int l = N-1-j; l >= 0; l--){
25+
s = snm[l+(j-2)*(2*N+3-j)/2];
26+
c = cnm[l+(j-2)*(2*N+3-j)/2];
27+
a1 = A[l+N*(2*m-1)];
28+
a2 = A[l+2+N*(2*m-1)];
29+
a3 = A[l+N*(2*m)];
30+
a4 = A[l+2+N*(2*m)];
31+
A[l+N*(2*m-1)] = c*a1 + s*a2;
32+
A[l+2+N*(2*m-1)] = c*a2 - s*a1;
33+
A[l+N*(2*m)] = c*a3 + s*a4;
34+
A[l+2+N*(2*m)] = c*a4 - s*a3;
35+
}
36+
}
37+
}
38+
return;
39+
}
40+
41+
void julia_apply_givens_t(double * A, const double * snm, const double * cnm, const int N, const int M) {
42+
#pragma omp parallel for schedule(dynamic)
43+
for (int m = M/2; m > 1; m--) {
44+
double s, c;
45+
double a1, a2, a3, a4;
46+
for (int j = 2+m%2; j <= m; j = j+2) {
47+
for (int l = 0; l <= N-1-j; l++){
48+
s = snm[l+(j-2)*(2*N+3-j)/2];
49+
c = cnm[l+(j-2)*(2*N+3-j)/2];
50+
a1 = A[l+N*(2*m-1)];
51+
a2 = A[l+2+N*(2*m-1)];
52+
a3 = A[l+N*(2*m)];
53+
a4 = A[l+2+N*(2*m)];
54+
A[l+N*(2*m-1)] = c*a1 - s*a2;
55+
A[l+2+N*(2*m-1)] = c*a2 + s*a1;
56+
A[l+N*(2*m)] = c*a3 - s*a4;
57+
A[l+2+N*(2*m)] = c*a4 + s*a3;
58+
}
59+
}
60+
}
61+
return;
62+
}

deps/build.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
p = pwd()
2+
cd(Pkg.dir("FastTransforms/deps/"))
3+
run(`make`)
4+
cd(p)

src/SphericalHarmonics/fastplan.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ struct FastSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
88
B::Matrix{T}
99
end
1010

11-
function FastSphericalHarmonicPlan{T}(A::Matrix{T}, L::Int; opts...)
11+
function FastSphericalHarmonicPlan(A::Matrix{T}, L::Int; opts...) where T
1212
M, N = size(A)
1313
n = (N+1)÷2
1414
RP = RotationPlan(T, n-1)

src/SphericalHarmonics/slowplan.jl

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import Base.LinAlg: Givens, AbstractRotation
22

33
### These three A_mul_B! should be in Base, but for the time being they do not add methods to Base.A_mul_B!; they add methods to the internal A_mul_B!.
44

5-
function A_mul_B!{T<:Real}(G::Givens{T}, A::AbstractVecOrMat)
5+
function A_mul_B!(G::Givens{T}, A::AbstractVecOrMat) where T<:Real
66
m, n = size(A, 1), size(A, 2)
77
if G.i2 > m
88
throw(DimensionMismatch("column indices for rotation are outside the matrix"))
@@ -28,7 +28,7 @@ function A_mul_B!(A::AbstractMatrix, G::Givens)
2828
return A
2929
end
3030

31-
function A_mul_B!{T<:Real}(A::AbstractMatrix, G::Givens{T})
31+
function A_mul_B!(A::AbstractMatrix, G::Givens{T}) where T<:Real
3232
m, n = size(A, 1), size(A, 2)
3333
if G.i2 > n
3434
throw(DimensionMismatch("column indices for rotation are outside the matrix"))
@@ -45,7 +45,7 @@ struct Pnmp2toPlm{T} <: AbstractRotation{T}
4545
rotations::Vector{Givens{T}}
4646
end
4747

48-
function Pnmp2toPlm{T}(::Type{T}, n::Int, m::Int)
48+
function Pnmp2toPlm(::Type{T}, n::Int, m::Int) where T
4949
G = Vector{Givens{T}}(n)
5050
@inbounds for= 1:n
5151
c = sqrt(T((2m+2)*(2+2m+3))/T((ℓ+2m+2)*(ℓ+2m+3)))
@@ -75,14 +75,42 @@ end
7575

7676
struct RotationPlan{T} <: AbstractRotation{T}
7777
layers::Vector{Pnmp2toPlm{T}}
78+
snm::Vector{T}
79+
cnm::Vector{T}
7880
end
7981

80-
function RotationPlan{T}(::Type{T}, n::Int)
82+
function RotationPlan(::Type{T}, n::Int) where T
8183
layers = Vector{Pnmp2toPlm{T}}(n-1)
8284
@inbounds for m = 0:n-2
8385
layers[m+1] = Pnmp2toPlm(T, n-1-m, m)
8486
end
85-
RotationPlan(layers)
87+
n = n+1
88+
snm = zeros(T, (n*(n+1))÷2)
89+
cnm = zeros(T, (n*(n+1))÷2)
90+
@inbounds for l = 0:n-1
91+
for m = 0:n-l-1
92+
nums = T((l+1)*(l+2))
93+
numc = T((2*m+2)*(2*l+2*m+5))
94+
den = T((l+2*m+3)*(l+2*m+4))
95+
snm[l+(m*(2*n+1-m))÷2+1] = sqrt(nums/den)
96+
cnm[l+(m*(2*n+1-m))÷2+1] = sqrt(numc/den)
97+
end
98+
end
99+
RotationPlan(layers, snm, cnm)
100+
end
101+
102+
const rotpath = joinpath(Pkg.dir("FastTransforms"), "deps", "rotpar")
103+
104+
function Base.A_mul_B!(P::RotationPlan{Float64}, A::AbstractMatrix{Float64})
105+
M, N = size(A)
106+
ccall((:julia_apply_givens, rotpath), Void, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Int64, Int64), A, P.snm, P.cnm, M, N)
107+
A
108+
end
109+
110+
function Base.At_mul_B!(P::RotationPlan{Float64}, A::AbstractMatrix{Float64})
111+
M, N = size(A)
112+
ccall((:julia_apply_givens_t, rotpath), Void, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Int64, Int64), A, P.snm, P.cnm, M, N)
113+
A
86114
end
87115

88116
function Base.A_mul_B!(P::RotationPlan, A::AbstractMatrix)
@@ -135,7 +163,7 @@ struct SlowSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
135163
B::Matrix{T}
136164
end
137165

138-
function SlowSphericalHarmonicPlan{T}(A::Matrix{T})
166+
function SlowSphericalHarmonicPlan(A::Matrix{T}) where T
139167
M, N = size(A)
140168
n = (N+1)÷2
141169
RP = RotationPlan(T, n-1)

src/SphericalHarmonics/sphfunctions.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ function sph_zero_spurious_modes!(A::AbstractMatrix)
1010
A
1111
end
1212

13-
function sphrand{T}(::Type{T}, m::Int, n::Int)
13+
function sphrand(::Type{T}, m::Int, n::Int) where T
1414
A = zeros(T, m, 2n-1)
1515
for i = 1:m
1616
A[i,1] = rand(T)
@@ -24,7 +24,7 @@ function sphrand{T}(::Type{T}, m::Int, n::Int)
2424
A
2525
end
2626

27-
function sphrandn{T}(::Type{T}, m::Int, n::Int)
27+
function sphrandn(::Type{T}, m::Int, n::Int) where T
2828
A = zeros(T, m, 2n-1)
2929
for i = 1:m
3030
A[i,1] = randn(T)
@@ -38,7 +38,7 @@ function sphrandn{T}(::Type{T}, m::Int, n::Int)
3838
A
3939
end
4040

41-
function sphones{T}(::Type{T}, m::Int, n::Int)
41+
function sphones(::Type{T}, m::Int, n::Int) where T
4242
A = zeros(T, m, 2n-1)
4343
for i = 1:m
4444
A[i,1] = one(T)
@@ -52,7 +52,7 @@ function sphones{T}(::Type{T}, m::Int, n::Int)
5252
A
5353
end
5454

55-
sphzeros{T}(::Type{T}, m::Int, n::Int) = zeros(T, m, 2n-1)
55+
sphzeros(::Type{T}, m::Int, n::Int) where T = zeros(T, m, 2n-1)
5656

5757
function normalizecolumns!(A::AbstractMatrix)
5858
m, n = size(A)

src/SphericalHarmonics/synthesisanalysis.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ struct SynthesisPlan{T, P1, P2}
55
temp::Vector{T}
66
end
77

8-
function plan_synthesis{T<:fftwNumber}(A::Matrix{T})
8+
function plan_synthesis(A::Matrix{T}) where T<:fftwNumber
99
m, n = size(A)
1010
x = FFTW.FakeArray(T, m)
1111
y = FFTW.FakeArray(T, n)
@@ -22,7 +22,7 @@ struct AnalysisPlan{T, P1, P2}
2222
temp::Vector{T}
2323
end
2424

25-
function plan_analysis{T<:fftwNumber}(A::Matrix{T})
25+
function plan_analysis(A::Matrix{T}) where T<:fftwNumber
2626
m, n = size(A)
2727
x = FFTW.FakeArray(T, m)
2828
y = FFTW.FakeArray(T, n)
@@ -32,7 +32,7 @@ function plan_analysis{T<:fftwNumber}(A::Matrix{T})
3232
AnalysisPlan(planθ, planφ, C, zeros(T, n))
3333
end
3434

35-
function Base.A_mul_B!{T}(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T})
35+
function Base.A_mul_B!(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T}) where T
3636
M, N = size(X)
3737

3838
# Column synthesis
@@ -73,7 +73,7 @@ function Base.A_mul_B!{T}(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T})
7373
Y
7474
end
7575

76-
function Base.A_mul_B!{T}(Y::Matrix{T}, P::AnalysisPlan{T}, X::Matrix{T})
76+
function Base.A_mul_B!(Y::Matrix{T}, P::AnalysisPlan{T}, X::Matrix{T}) where T
7777
M, N = size(X)
7878

7979
# Row analysis
@@ -112,7 +112,7 @@ end
112112

113113

114114

115-
function row_analysis!{T}(P, C, vals::Vector{T})
115+
function row_analysis!(P, C, vals::Vector{T}) where T
116116
n = length(vals)
117117
cfs = scale!(two(T)/n,P*vals)
118118
cfs[1] *= half(T)
@@ -123,7 +123,7 @@ function row_analysis!{T}(P, C, vals::Vector{T})
123123
negateeven!(reverseeven!(A_mul_B!(C, cfs)))
124124
end
125125

126-
function row_synthesis!{T}(P, C, cfs::Vector{T})
126+
function row_synthesis!(P, C, cfs::Vector{T}) where T
127127
n = length(cfs)
128128
Ac_mul_B!(C, reverseeven!(negateeven!(cfs)))
129129
if iseven(n)
@@ -171,17 +171,17 @@ function negateeven!(x::Vector)
171171
x
172172
end
173173

174-
function A_mul_B_col_J!{T}(Y::Matrix{T}, P::r2rFFTWPlan{T}, X::Matrix{T}, J::Int)
174+
function A_mul_B_col_J!(Y::Matrix{T}, P::r2rFFTWPlan{T}, X::Matrix{T}, J::Int) where T
175175
unsafe_execute_col_J!(P, X, Y, J)
176176
return Y
177177
end
178178

179-
function unsafe_execute_col_J!{T<:fftwDouble}(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int)
179+
function unsafe_execute_col_J!(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int) where T<:fftwDouble
180180
M = size(X, 1)
181181
ccall((:fftw_execute_r2r, libfftw), Void, (PlanPtr, Ptr{T}, Ptr{T}), plan, pointer(X, M*(J-1)+1), pointer(Y, M*(J-1)+1))
182182
end
183183

184-
function unsafe_execute_col_J!{T<:fftwSingle}(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int)
184+
function unsafe_execute_col_J!(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int) where T<:fftwSingle
185185
M = size(X, 1)
186186
ccall((:fftwf_execute_r2r, libfftwf), Void, (PlanPtr, Ptr{T}, Ptr{T}), plan, pointer(X, M*(J-1)+1), pointer(Y, M*(J-1)+1))
187187
end

src/SphericalHarmonics/thinplan.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ struct ThinSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
1212
B::Matrix{T}
1313
end
1414

15-
function ThinSphericalHarmonicPlan{T}(A::Matrix{T}, L::Int; opts...)
15+
function ThinSphericalHarmonicPlan(A::Matrix{T}, L::Int; opts...) where T
1616
M, N = size(A)
1717
n = (N+1)÷2
1818
RP = RotationPlan(T, n-1)

src/TriangularHarmonics/slowplan.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
immutable Pnmp1toPlm{T} <: AbstractRotation{T}
1+
struct Pnmp1toPlm{T} <: AbstractRotation{T}
22
rotations::Vector{Givens{T}}
33
end
44

5-
function Pnmp1toPlm{T}(::Type{T}, n::Int, m::Int, α::T, β::T, γ::T)
5+
function Pnmp1toPlm(::Type{T}, n::Int, m::Int, α::T, β::T, γ::T) where T
66
G = Vector{Givens{T}}(n)
77
@inbounds for= 1:n
88
c = sqrt((2m+β+γ+2)/(ℓ+2m+β+γ+2)*(2+2m+α+β+γ+2)/(ℓ+2m+α+β+γ+2))
@@ -29,11 +29,11 @@ function Base.A_mul_B!(A::AbstractMatrix, C::Pnmp1toPlm)
2929
A
3030
end
3131

32-
immutable TriRotationPlan{T} <: AbstractRotation{T}
32+
struct TriRotationPlan{T} <: AbstractRotation{T}
3333
layers::Vector{Pnmp1toPlm{T}}
3434
end
3535

36-
function TriRotationPlan{T}(::Type{T}, n::Int, α::T, β::T, γ::T)
36+
function TriRotationPlan(::Type{T}, n::Int, α::T, β::T, γ::T) where T
3737
layers = Vector{Pnmp1toPlm{T}}(n)
3838
@inbounds for m = 0:n-1
3939
layers[m+1] = Pnmp1toPlm(T, n-m, m, α, β, γ)
@@ -76,14 +76,14 @@ end
7676
Base.Ac_mul_B!(P::TriRotationPlan, A::AbstractMatrix) = At_mul_B!(P, A)
7777

7878

79-
immutable SlowTriangularHarmonicPlan{T} <: TriangularHarmonicPlan{T}
79+
struct SlowTriangularHarmonicPlan{T} <: TriangularHarmonicPlan{T}
8080
RP::TriRotationPlan{T}
8181
p::NormalizedLegendreToChebyshevPlan{T}
8282
pinv::ChebyshevToNormalizedLegendrePlan{T}
8383
B::Matrix{T}
8484
end
8585

86-
function SlowTriangularHarmonicPlan{T}(A::Matrix{T}, α, β, γ)
86+
function SlowTriangularHarmonicPlan(A::Matrix{T}, α, β, γ) where T
8787
@assert β == γ == -half(T)
8888
@assert α == zero(T)
8989
M, N = size(A)

0 commit comments

Comments
 (0)