Skip to content

Commit 2026064

Browse files
committed
improve parallel version, simplify some code, some memory reduction and speedup
1 parent 6005194 commit 2026064

17 files changed

+781
-708
lines changed

examples/projection_intersection_2D.jl

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ elseif options.FL==Float32
3636
end
3737

3838
#load image to project
39-
file = matopen(joinpath(dirname(pathof(SetIntersectionProjection)), "../examples/Data/compass_velocity.mat"))
40-
m = read(file, "Data");close(file)
41-
m = m[1:341,200:600]
42-
m = permutedims(m,[2,1])
39+
file = matopen(joinpath(dirname(pathof(SetIntersectionProjection)), "../examples/Data/compass_velocity.mat"));
40+
m = read(file, "Data");close(file);
41+
m = m[1:341,200:600];
42+
m = permutedims(m,[2,1]);
4343

4444
#set up computational grid (25 and 6 m are the original distances between grid points)
4545
comp_grid = compgrid((TF(25.0), TF(6.0)),(size(m,1), size(m,2)))
46-
m = convert(Vector{TF},vec(m))
46+
m = convert(Vector{TF},vec(m));
4747

4848
#define axis limits and colorbar limits
4949
xmax = comp_grid.d[1]*comp_grid.n[1]
@@ -55,13 +55,13 @@ vma = 4500
5555
constraint = Vector{SetIntersectionProjection.set_definitions}()
5656

5757
#bounds:
58-
m_min = 1500.0
58+
m_min = 1480.0
5959
m_max = 4500.0
6060
set_type = "bounds"
6161
TD_OP = "identity"
6262
app_mode = ("matrix","")
6363
custom_TD_OP = ([],false)
64-
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
64+
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));
6565

6666
#slope constraints (vertical)
6767
m_min = 0.0
@@ -70,11 +70,11 @@ set_type = "bounds"
7070
TD_OP = "D_z"
7171
app_mode = ("matrix","")
7272
custom_TD_OP = ([],false)
73-
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
73+
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));
7474

7575
options.parallel = false
76-
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
77-
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
76+
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
77+
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);
7878

7979
println("")
8080
println("PARSDMM serial (bounds and bounds on D_z):")
@@ -103,14 +103,17 @@ tight_layout()
103103
#tight_layout(pad=0.0, w_pad=0.0, h_pad=1.0)
104104
savefig("PARSDMM_logs.png",bbox_inches="tight")
105105

106+
#print timings in terminal
107+
log_PARSDMM.timing
108+
106109
println("")
107110
println("PARSDMM parallel (bounds and bounds on D_z):")
108111
options.parallel = true
109-
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
110-
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
112+
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
113+
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);
111114

112115
@time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
113-
#@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
116+
@time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
114117
#@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
115118

116119
#plot
@@ -119,6 +122,9 @@ subplot(2,1,1);imshow(permutedims(reshape(m,(comp_grid.n[1],comp_grid.n[2])),[2,
119122
subplot(2,1,2);imshow(permutedims(reshape(x2,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("Projection (bounds and bounds on D_z)")
120123
savefig("projected_model_ParallelPARSDMM.png",bbox_inches="tight")
121124

125+
#print timings in terminal
126+
log_PARSDMM.timing
127+
122128
# #use multilevel-serial (2-levels)
123129
# options.parallel = false
124130

examples/projection_intersection_3D.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Bas Peters, 2017
44

55
using Distributed
6-
using LinearAlgebra
6+
@everywhere using LinearAlgebra
77
@everywhere using SetIntersectionProjection
88
using HDF5
99
using PyPlot
@@ -17,7 +17,7 @@ end
1717
if ~isfile("overthrust_3D_true_model.h5")
1818
run(`wget ftp://slim.gatech.edu/data/SoftwareRelease/WaveformInversion.jl/3DFWI/overthrust_3D_true_model.h5`)
1919
end
20-
n, d, o, m = read(h5open("overthrust_3D_true_model.h5","r"), "n", "d", "o", "m")
20+
n, d, o, m = read(h5open("overthrust_3D_true_model.h5","r"), "n", "d", "o", "m");
2121

2222
m .= 1000.0 ./ sqrt.(m);
2323
m = m[50:200,50:200,:];
@@ -36,7 +36,7 @@ options.Blas_active = true
3636
options.maxit = 500
3737

3838
set_zero_subnormals(true)
39-
BLAS.set_num_threads(4)
39+
@everywhere BLAS.set_num_threads(4)
4040

4141
#select working precision
4242
if options.FL==Float64
@@ -68,7 +68,7 @@ set_type = "bounds"
6868
TD_OP = "identity"
6969
app_mode = ("matrix","")
7070
custom_TD_OP = ([],false)
71-
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
71+
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));
7272

7373
#slope constraints (vertical)
7474
m_min = 0.0
@@ -77,11 +77,11 @@ set_type = "bounds"
7777
TD_OP = "D_z"
7878
app_mode = ("matrix","")
7979
custom_TD_OP = ([],false)
80-
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP))
80+
push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP));
8181

8282
options.parallel = false
83-
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
84-
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
83+
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
84+
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);
8585

8686
println("")
8787
println("PARSDMM serial (bounds and bounds on D_z):")
@@ -133,12 +133,12 @@ println("PARSDMM multilevel-serial (bounds and bounds on D_z):")
133133
println("")
134134
println("PARSDMM parallel (bounds and bounds on D_z):")
135135
options.parallel = true
136-
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL)
137-
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options)
136+
(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL);
137+
(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options);
138138

139-
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options)
140-
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options)
141-
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options)
139+
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
140+
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
141+
@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options);
142142

143143
#use multilevel-parallel (2-levels)
144144
options.parallel = true

src/CDS_MVp.jl

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export CDS_MVp
1+
export CDS_MVp, CDS_MVp2, CDS_MVp3, CDS_MVp4
22

33
"""
44
compute single-thread matrix vector product with vector x, output is vector y: y=A*x
@@ -7,15 +7,15 @@ R is a tall matrix N by ndiagonals, corresponding to a square matrix A
77
offsets indicate offset of diagonal compared to the main diagonal in A (which is 0)
88
"""
99
function CDS_MVp(
10-
N::Integer,
11-
ndiags::Integer,
12-
R::Array{TF,2},
13-
offset::Vector{TI},
14-
x::Vector{TF},
15-
y::Vector{TF}) where {TF<:Real,TI<:Integer}
10+
N ::Integer,
11+
ndiags ::Integer,
12+
R ::Array{TF,2},
13+
offset ::Vector{TI},
14+
x ::Vector{TF},
15+
y ::Vector{TF}) where {TF<:Real,TI<:Integer}
1616

1717
for i = 1 : ndiags
18-
d = offset[i]
18+
d = offset[i]
1919
r0 = max(1, 1-d)
2020
r1 = min(N, N-d)
2121
c0 = max(1, 1+d)
@@ -26,3 +26,51 @@ function CDS_MVp(
2626
end
2727
return y
2828
end
29+
30+
# #the versions below is insignificantly faster than the original version above
31+
# function CDS_MVp(
32+
# N::Integer,
33+
# ndiags::Integer,
34+
# R::Array{TF,2},
35+
# offset::Vector{TI},
36+
# x::Vector{TF},
37+
# y::Vector{TF}) where {TF<:Real,TI<:Integer}
38+
39+
# for i = 1 : ndiags
40+
# d = offset[i]
41+
# r0 = max(1, 1-d)
42+
# r1 = min(N, N-d)
43+
# c0 = max(1, 1+d)
44+
# #r0_plus_c0 = r0 + c0
45+
# c = deepcopy(c0)
46+
# for r = r0 : r1
47+
# @inbounds y[r] = y[r] + R[r,i] * x[c]#original
48+
# c += 1
49+
# end
50+
# end
51+
# return y
52+
# end
53+
54+
# function CDS_MVp(
55+
# N ::Integer,
56+
# ndiags ::Integer,
57+
# R ::Array{TF,2},
58+
# offset ::Vector{TI},
59+
# x ::Vector{TF},
60+
# y ::Vector{TF}) where {TF<:Real,TI<:Integer}
61+
62+
# for i = 1 : ndiags
63+
# d = offset[i]
64+
# r0 = max(1, 1-d)
65+
# r1 = min(N, N-d)
66+
# c0 = max(1, 1+d)
67+
# # for r = r0 : r1
68+
# # c = r - r0 + c0 #original
69+
# # @inbounds y[r] = y[r] + R[r,i] * x[c]#original
70+
# # end
71+
# @inbounds y[r0 : r1] .= y[r0 : r1] .+ R[r0:r1,i].*x[r0 - r0 + c0:r1 - r0 + c0]
72+
# end
73+
74+
# return y
75+
# end
76+

src/CDS_MVp_MT.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ function CDS_MVp_MT(
1515
y ::Vector{TF}) where {TF<:Real,TI<:Integer}
1616

1717
for i = 1 : ndiags
18-
d = offset[i]
18+
d = offset[i]
1919
r0 = max(1, 1-d)
2020
r1 = min(N, N-d)
2121
c0 = max(1, 1+d)

src/CDS_MVp_MT_subfunc.jl

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,14 @@ wrapper around Julia threads to compute a multi-threaded matrix vector product i
44
This is a subfunction of CDS_MVp_MT.jl
55
"""
66
function CDS_MVp_MT_subfunc(
7-
R::Array{TF,2},
8-
x::Vector{TF},
9-
y::Vector{TF},
10-
r0::Int,
11-
c0::Int,
12-
r1::Int,
13-
i::Int) where {TF<:Real}
7+
R ::Array{TF,2},
8+
x ::Vector{TF},
9+
y ::Vector{TF},
10+
r0 ::Int,
11+
c0 ::Int,
12+
r1 ::Int,
13+
i ::Int) where {TF<:Real}
1414

15-
#s=rind-1
1615
@Threads.threads for r = r0 : r1
1716
c = r - r0 + c0 #original
1817
@inbounds y[r] = y[r] + R[r,i] * x[c]#original

src/CDS_scaled_add!.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ export CDS_scaled_add!
33

44
"""
55
Computes A = A + alpha * B for A and B in the compressed diagonal storage format (CDS/DIA)
6-
TO DO: make this function multi-threaded per column of A and B
6+
TO DO: make this function multi-threaded per column of A and B, or use BLAS functions
77
"""
88
function CDS_scaled_add!(
99
A ::Array{TF,2},
@@ -16,7 +16,7 @@ function CDS_scaled_add!(
1616
for k=1:length(B_offsets)
1717
A_update_col = findall((in)(B_offsets[k]),A_offsets)
1818
if isempty(A_update_col) == true
19-
error("attempted to update a diagonal in A in CDS storage that does not excist. A and B need to have the same nonzero diagonals")
19+
error("attempted to update a diagonal in A in CDS storage that does not exist. A and B need to have the same nonzero diagonals")
2020
end
2121
A[:,A_update_col] .= A[:,A_update_col] .+ alpha .* B[:,k];
2222
end

0 commit comments

Comments
 (0)