diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index e942ab6c..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitmodules b/.gitmodules index f1abbe18..b09492e6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "deps/SparseRooflineBenchmark"] path = deps/SparseRooflineBenchmark - url = https://github.com/SparseRooflineBenchmark/SparseRooflineBenchmark + url = git@github.com:Paramuths/SparseRooflineBenchmark.git [submodule "deps/taco"] path = deps/taco url = https://github.com/tensor-compiler/taco diff --git a/deps/SparseRooflineBenchmark b/deps/SparseRooflineBenchmark index 68d595bc..247652ae 160000 --- a/deps/SparseRooflineBenchmark +++ b/deps/SparseRooflineBenchmark @@ -1 +1 @@ -Subproject commit 68d595bc3b7bb6cc72a047b3bf15896c53948c51 +Subproject commit 247652ae192d84b1ad562fff72e4769d6fd68977 diff --git a/parallel/.gitignore b/parallel/.gitignore new file mode 100644 index 00000000..ba39cc53 --- /dev/null +++ b/parallel/.gitignore @@ -0,0 +1 @@ +Manifest.toml diff --git a/parallel/Project.toml b/parallel/Project.toml new file mode 100644 index 00000000..59770062 --- /dev/null +++ b/parallel/Project.toml @@ -0,0 +1,15 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +Finch = "9177782c-1635-4eb9-9bfb-d9dfa25e6bce" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" +IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MatrixDepot = "b51810bb-c9f3-55da-ae3c-350fc1fbce05" +Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" +SuiteSparseGraphBLAS = "c2e53296-7b14-11e9-1210-bddfa8111e1d" +TensorMarket = "8b7d4fe7-0b45-4d0d-9dd8-5cc9b23b4b77" +ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042" diff --git a/parallel/spadd/.gitignore b/parallel/spadd/.gitignore new file mode 100644 index 00000000..4902375f --- /dev/null +++ b/parallel/spadd/.gitignore @@ -0,0 +1,2 @@ +Manifest.toml +slurm* diff --git a/parallel/spadd/concat.jl b/parallel/spadd/concat.jl new file mode 100644 index 00000000..743e3098 --- /dev/null +++ b/parallel/spadd/concat.jl @@ -0,0 +1,66 @@ +using Finch +using Base.Threads + +function concat(A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, B::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}) + @inbounds @fastmath(begin + A_lvl = A.lvl # DenseLevel + A_lvl_2 = A_lvl.lvl # SparseListLevel + A_lvl_ptr = A_lvl_2.ptr # Vector{Int64} + A_lvl_idx = A_lvl_2.idx # Vector{Int64} + # A_lvl_3 = A_lvl_2.lvl # ElementLevel + A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64} + + B_lvl = B.lvl # DenseLevel + B_lvl_2 = B_lvl.lvl # SparseListLevel + B_lvl_ptr = B_lvl_2.ptr # Vector{Int64} + B_lvl_idx = B_lvl_2.idx # Vector{Int64} + # B_lvl_3 = B_lvl_2.lvl # ElementLevel + B_lvl_2_val = B_lvl_2.lvl.val # Vector{Float64} + + # val + C_lvl_2_val = vcat(A_lvl_2_val, B_lvl_2_val) + C_lvl_3 = Element{0.0,Float64,Int64}(C_lvl_2_val) + # shape + A_lvl_2.shape == B_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl_2.shape) != $(B_lvl_2.shape))")) + C_lvl_shape = A_lvl_2.shape + # pointer + B_lvl_ptr_shift = B_lvl_ptr[2:end] .+ (last(A_lvl_ptr) - 1) + C_lvl_ptr = vcat(A_lvl_ptr, B_lvl_ptr_shift) + # index + C_lvl_idx = vcat(A_lvl_idx, B_lvl_idx) + + C_lvl_2 = SparseList{Int64}(C_lvl_3, C_lvl_shape, C_lvl_ptr, C_lvl_idx) + C_lvl = Dense{Int64}(C_lvl_2, A_lvl.shape + B_lvl.shape) + + C = Tensor(C_lvl) + return C + end) +end + +function concat_vec(V::Vector{Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, nonzero_offset::Vector{Int64}, columns::Vector{Int64}) + @inbounds @fastmath(begin + # val + B_lvl_2_val = Vector{Float64}(undef, last(nonzero_offset)) + # shape + B_lvl_shape = V[1].lvl.lvl.shape + # pointer + B_lvl_ptr = Vector{Int64}(undef, last(columns) + 1) + B_lvl_ptr[1] = 1 + # idx + B_lvl_idx = Vector{Int64}(undef, last(nonzero_offset)) + + Threads.@threads for i in 1:length(V) + B_lvl_2_val[nonzero_offset[i]+1:nonzero_offset[i+1]] .= V[i].lvl.lvl.lvl.val + B_lvl_idx[nonzero_offset[i]+1:nonzero_offset[i+1]] .= V[i].lvl.lvl.idx + B_lvl_ptr[columns[i]+2:columns[i+1]+1] = V[i].lvl.lvl.ptr[2:end] .+ nonzero_offset[i] + end + B_lvl_3 = Element{0.0,Float64,Int64}(B_lvl_2_val) + + B_lvl_2 = SparseList{Int64}(B_lvl_3, B_lvl_shape, B_lvl_ptr, B_lvl_idx) + B_lvl = Dense{Int64}(B_lvl_2, mapreduce(A -> A.lvl.shape, +, V)) + + B = Tensor(B_lvl) + return B + end) +end + diff --git a/parallel/spadd/graph.py b/parallel/spadd/graph.py new file mode 100644 index 00000000..b627dce0 --- /dev/null +++ b/parallel/spadd/graph.py @@ -0,0 +1,116 @@ +import json +from collections import defaultdict + +import matplotlib.pyplot as plt + +GRAPH_FOLDER = "graph" +SPEEDUP_FOLDER = "speedup" +RUNTIME_FOLDER = "runtime" +RESULTS_FOLDER = "results" + +NTHREADS = [i + 1 for i in range(12)] + +DEFAULT_METHOD = "serial_default_implementation" +METHODS = [ + DEFAULT_METHOD, + # "parallel_col_separate_sparselist_results", + "separated_memory_concatenate_results", +] + +DATASETS = [ + {"uniform": ["1000-0.1", "10000-0.1", "1000000-3000000"]}, + {"FEMLAB": ["FEMLAB-poisson3Da", "FEMLAB-poisson3Db"]}, +] + +COLORS = ["gray", "cadetblue", "saddlebrown", "navy", "black"] + + +def load_json(): + combine_results = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: {}))) + for n_thread in NTHREADS: + results_json = json.load( + open(f"{RESULTS_FOLDER}/spadd_{n_thread}_threads.json", "r") + ) + for result in results_json: + + matrix = ( + result["matrix"].replace("/", "-") + if result["dataset"] != "uniform" + else f"{result['matrix']['size']}-{result['matrix']['sparsity']}" + ) + combine_results[result["dataset"]][matrix][result["method"]][ + result["n_threads"] + ] = result["time"] + + return combine_results + + +def plot_speedup_result(results, dataset, matrix, save_location): + plt.figure(figsize=(10, 6)) + for method, color in zip(METHODS, COLORS): + plt.plot( + NTHREADS, + [ + results[dataset][matrix][DEFAULT_METHOD][n_thread] + / results[dataset][matrix][method][n_thread] + for n_thread in NTHREADS + ], + label=method, + color=color, + marker="o", + linestyle="-", + linewidth=1, + ) + + plt.title( + f"SpAdd - Speedup for {dataset}: {matrix} (with respect to {DEFAULT_METHOD})" + ) + # plt.yscale("log", base=10) + plt.xticks(NTHREADS) + plt.xlabel("Number of Threads") + plt.ylabel(f"Speedup") + + plt.legend() + plt.savefig(save_location) + + +def plot_runtime_result(results, dataset, matrix, save_location): + plt.figure(figsize=(10, 6)) + for method, color in zip(METHODS, COLORS): + plt.plot( + NTHREADS, + [results[dataset][matrix][method][n_thread] for n_thread in NTHREADS], + label=method, + color=color, + marker="o", + linestyle="-", + linewidth=1, + ) + + plt.title(f"SpAdd - Runtime for {dataset}: {matrix}") + # plt.yscale("log", base=10) + plt.xticks(NTHREADS) + plt.xlabel("Number of Threads") + plt.ylabel(f"Runtime (in seconds)") + + plt.legend() + plt.savefig(save_location) + + +if __name__ == "__main__": + results = load_json() + for datasets in DATASETS: + for dataset, matrices in datasets.items(): + for matrix in matrices: + plot_speedup_result( + results, + dataset, + matrix, + f"{GRAPH_FOLDER}/{SPEEDUP_FOLDER}/{dataset}-{matrix}.png", + ) + plot_runtime_result( + results, + dataset, + matrix, + f"{GRAPH_FOLDER}/{RUNTIME_FOLDER}/{dataset}-{matrix}.png", + ) diff --git a/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png new file mode 100644 index 00000000..0fa39be8 Binary files /dev/null and b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png differ diff --git a/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png new file mode 100644 index 00000000..fe76ad30 Binary files /dev/null and b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png differ diff --git a/parallel/spadd/graph/runtime/uniform-1000-0.1.png b/parallel/spadd/graph/runtime/uniform-1000-0.1.png new file mode 100644 index 00000000..97344036 Binary files /dev/null and b/parallel/spadd/graph/runtime/uniform-1000-0.1.png differ diff --git a/parallel/spadd/graph/runtime/uniform-10000-0.1.png b/parallel/spadd/graph/runtime/uniform-10000-0.1.png new file mode 100644 index 00000000..5ffd7f04 Binary files /dev/null and b/parallel/spadd/graph/runtime/uniform-10000-0.1.png differ diff --git a/parallel/spadd/graph/runtime/uniform-1000000-3000000.png b/parallel/spadd/graph/runtime/uniform-1000000-3000000.png new file mode 100644 index 00000000..1cb14e7e Binary files /dev/null and b/parallel/spadd/graph/runtime/uniform-1000000-3000000.png differ diff --git a/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png new file mode 100644 index 00000000..6ef958a6 Binary files /dev/null and b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png differ diff --git a/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png new file mode 100644 index 00000000..d6689cc9 Binary files /dev/null and b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png differ diff --git a/parallel/spadd/graph/speedup/uniform-1000-0.1.png b/parallel/spadd/graph/speedup/uniform-1000-0.1.png new file mode 100644 index 00000000..363ad8f3 Binary files /dev/null and b/parallel/spadd/graph/speedup/uniform-1000-0.1.png differ diff --git a/parallel/spadd/graph/speedup/uniform-10000-0.1.png b/parallel/spadd/graph/speedup/uniform-10000-0.1.png new file mode 100644 index 00000000..639e8141 Binary files /dev/null and b/parallel/spadd/graph/speedup/uniform-10000-0.1.png differ diff --git a/parallel/spadd/graph/speedup/uniform-1000000-3000000.png b/parallel/spadd/graph/speedup/uniform-1000000-3000000.png new file mode 100644 index 00000000..57634862 Binary files /dev/null and b/parallel/spadd/graph/speedup/uniform-1000000-3000000.png differ diff --git a/parallel/spadd/parallel_col_separate_sparselist_results.jl b/parallel/spadd/parallel_col_separate_sparselist_results.jl new file mode 100644 index 00000000..ac358383 --- /dev/null +++ b/parallel/spadd/parallel_col_separate_sparselist_results.jl @@ -0,0 +1,19 @@ +using Finch +using BenchmarkTools + + +function parallel_col_separate_sparselist_results_add(A, B) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _B = Tensor(Dense(SparseList(Element(0.0))), B) + time = @belapsed begin + (_A, _B) = $(_A, _B) + global _C = Tensor(Dense(Separate(SparseList(Element(0.0))))) + @finch mode = :fast begin + _C .= 0 + for j = parallel(_), i = _ + _C[i, j] = _A[i, j] + _B[i, j] + end + end + end + return (; time=time, C=_C) +end diff --git a/parallel/spadd/results/spadd_10_threads.json b/parallel/spadd/results/spadd_10_threads.json new file mode 100644 index 00000000..6a617ee0 --- /dev/null +++ b/parallel/spadd/results/spadd_10_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.00759134, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.048477826, + "n_threads": 10, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.003060656, + "n_threads": 10, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.140822392, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.343739416, + "n_threads": 10, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.025895345, + "n_threads": 10, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002042569, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.004510134, + "n_threads": 10, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000682558, + "n_threads": 10, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.671362515, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.122503556, + "n_threads": 10, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.059856956, + "n_threads": 10, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.142538191, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.792686368, + "n_threads": 10, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.036291917, + "n_threads": 10, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_11_threads.json b/parallel/spadd/results/spadd_11_threads.json new file mode 100644 index 00000000..165c87cf --- /dev/null +++ b/parallel/spadd/results/spadd_11_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007813918, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.047563831, + "n_threads": 11, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.002597417, + "n_threads": 11, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.136202874, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.33770435, + "n_threads": 11, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.025623614, + "n_threads": 11, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002113507, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.004420404, + "n_threads": 11, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.00067307, + "n_threads": 11, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.674409652, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.108030634, + "n_threads": 11, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.064515586, + "n_threads": 11, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.158126594, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.636382679, + "n_threads": 11, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.031231612, + "n_threads": 11, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_12_threads.json b/parallel/spadd/results/spadd_12_threads.json new file mode 100644 index 00000000..ed6fb321 --- /dev/null +++ b/parallel/spadd/results/spadd_12_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.00775952, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.047737394, + "n_threads": 12, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.002776356, + "n_threads": 12, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.117517579, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.340512482, + "n_threads": 12, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.016165751, + "n_threads": 12, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002104756, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.004340178, + "n_threads": 12, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000680325, + "n_threads": 12, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.660803585, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.117194109, + "n_threads": 12, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.068447556, + "n_threads": 12, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.146184782, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.830222054, + "n_threads": 12, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.03003548, + "n_threads": 12, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_1_threads.json b/parallel/spadd/results/spadd_1_threads.json new file mode 100644 index 00000000..769e9158 --- /dev/null +++ b/parallel/spadd/results/spadd_1_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007658117, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.062250052, + "n_threads": 1, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.010286825, + "n_threads": 1, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.133347393, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.505824712, + "n_threads": 1, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.119929662, + "n_threads": 1, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00200161, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.007409245, + "n_threads": 1, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.002621693, + "n_threads": 1, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.67302425, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.411521742, + "n_threads": 1, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.398239441, + "n_threads": 1, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.093154071, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 4.521424105, + "n_threads": 1, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.217809633, + "n_threads": 1, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_2_threads.json b/parallel/spadd/results/spadd_2_threads.json new file mode 100644 index 00000000..e3809667 --- /dev/null +++ b/parallel/spadd/results/spadd_2_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.008053671, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.058495541, + "n_threads": 2, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.005601081, + "n_threads": 2, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.134318176, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.413190171, + "n_threads": 2, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.049217263, + "n_threads": 2, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002080681, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.007421289, + "n_threads": 2, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.001444168, + "n_threads": 2, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.660950071, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.256817776, + "n_threads": 2, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.274736245, + "n_threads": 2, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.089100803, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.755418452, + "n_threads": 2, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.093467113, + "n_threads": 2, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_3_threads.json b/parallel/spadd/results/spadd_3_threads.json new file mode 100644 index 00000000..d553cac4 --- /dev/null +++ b/parallel/spadd/results/spadd_3_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007815171, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.057860099, + "n_threads": 3, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.003969577, + "n_threads": 3, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.132775556, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.370948929, + "n_threads": 3, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.029833378, + "n_threads": 3, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002027846, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.006872266, + "n_threads": 3, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.00130002, + "n_threads": 3, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.672536501, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.195648383, + "n_threads": 3, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.230772324, + "n_threads": 3, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.142210671, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.74732249, + "n_threads": 3, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.061282212, + "n_threads": 3, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_4_threads.json b/parallel/spadd/results/spadd_4_threads.json new file mode 100644 index 00000000..9b27cbb0 --- /dev/null +++ b/parallel/spadd/results/spadd_4_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007712467, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.054351314, + "n_threads": 4, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.004878618, + "n_threads": 4, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.132065505, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.374797471, + "n_threads": 4, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.032860422, + "n_threads": 4, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002002598, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.005143974, + "n_threads": 4, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000868699, + "n_threads": 4, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.669091315, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.161954816, + "n_threads": 4, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.136907266, + "n_threads": 4, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.15480946, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.67523947, + "n_threads": 4, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.043427596, + "n_threads": 4, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_5_threads.json b/parallel/spadd/results/spadd_5_threads.json new file mode 100644 index 00000000..2d350891 --- /dev/null +++ b/parallel/spadd/results/spadd_5_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007722766, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.052393424, + "n_threads": 5, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.004978728, + "n_threads": 5, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.117776194, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.34473256, + "n_threads": 5, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.027384202, + "n_threads": 5, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002031694, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.005564533, + "n_threads": 5, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000879534, + "n_threads": 5, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.64254676, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.158092057, + "n_threads": 5, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.129361777, + "n_threads": 5, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.156580994, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.625150086, + "n_threads": 5, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.053426307, + "n_threads": 5, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_6_threads.json b/parallel/spadd/results/spadd_6_threads.json new file mode 100644 index 00000000..f8802068 --- /dev/null +++ b/parallel/spadd/results/spadd_6_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007953295, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.051207333, + "n_threads": 6, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.004370187, + "n_threads": 6, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.137860884, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.340840468, + "n_threads": 6, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.030216199, + "n_threads": 6, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002081867, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.00526826, + "n_threads": 6, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.001013415, + "n_threads": 6, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.67727586, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.141259518, + "n_threads": 6, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.108764424, + "n_threads": 6, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.138605195, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.624903508, + "n_threads": 6, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.035666361, + "n_threads": 6, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_7_threads.json b/parallel/spadd/results/spadd_7_threads.json new file mode 100644 index 00000000..70631b6a --- /dev/null +++ b/parallel/spadd/results/spadd_7_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007921264, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.049803029, + "n_threads": 7, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.003912309, + "n_threads": 7, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.134428848, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.341892755, + "n_threads": 7, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.031928255, + "n_threads": 7, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002088799, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.004972098, + "n_threads": 7, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000896116, + "n_threads": 7, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.681039133, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.126025398, + "n_threads": 7, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.102670088, + "n_threads": 7, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.172220199, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.598130947, + "n_threads": 7, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.035217355, + "n_threads": 7, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_8_threads.json b/parallel/spadd/results/spadd_8_threads.json new file mode 100644 index 00000000..ba4fafad --- /dev/null +++ b/parallel/spadd/results/spadd_8_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007804556, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.049307963, + "n_threads": 8, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.003644985, + "n_threads": 8, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.119541293, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.352797055, + "n_threads": 8, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.024231367, + "n_threads": 8, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002077453, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.004788928, + "n_threads": 8, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000702856, + "n_threads": 8, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.655951213, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.14574732, + "n_threads": 8, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.127390267, + "n_threads": 8, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.156350457, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.643876252, + "n_threads": 8, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.040533781, + "n_threads": 8, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/results/spadd_9_threads.json b/parallel/spadd/results/spadd_9_threads.json new file mode 100644 index 00000000..9fed7dfa --- /dev/null +++ b/parallel/spadd/results/spadd_9_threads.json @@ -0,0 +1,134 @@ +[ + { + "time": 0.007863298, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.048378622, + "n_threads": 9, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.003199436, + "n_threads": 9, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.132222928, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.338526563, + "n_threads": 9, + "method": "parallel_col_separate_sparselist_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.026502123, + "n_threads": 9, + "method": "separated_memory_concatenate_results", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002097927, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.004664288, + "n_threads": 9, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.000671842, + "n_threads": 9, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000, + "sparsity": 0.1 + } + }, + { + "time": 0.680030172, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.116336138, + "n_threads": 9, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.073227472, + "n_threads": 9, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 10000, + "sparsity": 0.1 + } + }, + { + "time": 0.154322425, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 3.624687224, + "n_threads": 9, + "method": "parallel_col_separate_sparselist_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + }, + { + "time": 0.03488489, + "n_threads": 9, + "method": "separated_memory_concatenate_results", + "dataset": "uniform", + "matrix": { + "size": 1000000, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spadd/run.sh b/parallel/spadd/run.sh new file mode 100755 index 00000000..170c1259 --- /dev/null +++ b/parallel/spadd/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +for (( t=1 ; t<=$1 ; t++)); +do + echo "Running run_spadd.jl with $t threads" + julia "--threads=$t" "run_spadd.jl" +done diff --git a/parallel/spadd/run_local.sh b/parallel/spadd/run_local.sh new file mode 100755 index 00000000..d4525b32 --- /dev/null +++ b/parallel/spadd/run_local.sh @@ -0,0 +1 @@ +source run.sh 4 diff --git a/parallel/spadd/run_slurm.sl b/parallel/spadd/run_slurm.sl new file mode 100644 index 00000000..8b3de32f --- /dev/null +++ b/parallel/spadd/run_slurm.sl @@ -0,0 +1,12 @@ +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=24 +#SBATCH --exclusive +#SBATCH -t 12:00:00 +#SBATCH --partition=lanka-v3 +#SBATCH --qos=commit-main +#SBATCH --mem 102400 +cd /data/scratch/paramuth/FinchBenchmarks/parallel/spadd +source run.sh 12 + diff --git a/parallel/spadd/run_spadd.jl b/parallel/spadd/run_spadd.jl new file mode 100644 index 00000000..02e59158 --- /dev/null +++ b/parallel/spadd/run_spadd.jl @@ -0,0 +1,125 @@ +using Base: nothing_sentinel +#!/usr/bin/env julia +if abspath(PROGRAM_FILE) == @__FILE__ + using Pkg + Pkg.activate(dirname(@__DIR__)) + Pkg.instantiate() +end +include("../../deps/diagnostics.jl") +print_diagnostics() + +using MatrixDepot +using BenchmarkTools +using ArgParse +using DataStructures +using JSON +using Random + +Random.seed!(1234) + +# Parsing Arguments +s = ArgParseSettings("Run Parallel SpAdd Experiments.") +@add_arg_table! s begin + "--output", "-o" + arg_type = String + help = "output file path" + "--dataset", "-d" + arg_type = String + help = "dataset keyword" + "--method", "-m" + arg_type = String + help = "method keyword" + "--accuracy-check", "-a" + action = :store_true + help = "check method accuracy" +end +parsed_args = parse_args(ARGS, s) + +# Mapping from dataset types to datasets +datasets = Dict( + "uniform" => [ + OrderedDict("size" => 1_000, "sparsity" => 0.1), + OrderedDict("size" => 10_000, "sparsity" => 0.1), + OrderedDict("size" => 1_000_000, "sparsity" => 3_000_000), + ], + "FEMLAB" => [ + "FEMLAB/poisson3Da", + "FEMLAB/poisson3Db", + ], +) + +# Mapping from method keywords to methods +include("serial_default_implementation.jl") +# include("parallel_col_separate_sparselist_results.jl") +include("separated_memory_concatenate_results.jl") + +methods = OrderedDict( + "serial_default_implementation" => serial_default_implementation_add, + # "parallel_col_separate_sparselist_results" => parallel_col_separate_sparselist_results_add, + "separated_memory_concatenate_results" => separated_memory_concatenate_results_add, +) + +if !isnothing(parsed_args["method"]) + method_name = parsed_args["method"] + @assert haskey(methods, method_name) "Unrecognize method for $method_name" + methods = OrderedDict( + method_name => methods[method_name] + ) +end + +function calculate_results(dataset, mtxs, results) + for mtx in mtxs + # Get relevant matrix + if dataset == "uniform" + A = fsprand(mtx["size"], mtx["size"], mtx["sparsity"]) + B = fsprand(mtx["size"], mtx["size"], mtx["sparsity"]) + elseif dataset == "FEMLAB" + A = matrixdepot(mtx) + row_permutation = randperm(size(A, 1)) + col_permutation = randperm(size(A, 2)) + B = A[row_permutation, col_permutation] + else + throw(ArgumentError("Cannot recognize dataset: $dataset")) + end + + for (key, method) in methods + result = method(A, B) + + if parsed_args["accuracy-check"] + # Check the result of the multiplication + serial_default_implementation_result = serial_default_implementation_add(A, B) + @assert result.C == serial_default_implementation_result.C "Incorrect result for $key" + end + + # Write result + time = result.time + @info "result for $key on $mtx" time + push!(results, OrderedDict( + "time" => time, + "n_threads" => Threads.nthreads(), + "method" => key, + "dataset" => dataset, + "matrix" => mtx, + )) + + if isnothing(parsed_args["output"]) + write("results/spadd_$(Threads.nthreads())_threads.json", JSON.json(results, 4)) + else + write(parsed_args["output"], JSON.json(results, 4)) + end + end + end +end + +results = [] +if isnothing(parsed_args["dataset"]) + for (dataset, mtxs) in datasets + calculate_results(dataset, mtxs, results) + end +else + dataset = parsed_args["dataset"] + mtxs = datasets[dataset] + calculate_results(dataset, mtxs, results) +end + + diff --git a/parallel/spadd/separated_memory_concatenate_results.jl b/parallel/spadd/separated_memory_concatenate_results.jl new file mode 100644 index 00000000..9b990f2b --- /dev/null +++ b/parallel/spadd/separated_memory_concatenate_results.jl @@ -0,0 +1,108 @@ +using Finch +using BenchmarkTools +using Base.Threads + +include("concat.jl") + +function separated_memory_concatenate_results_add(A, B) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _B = Tensor(Dense(SparseList(Element(0.0))), B) + time = @belapsed begin + (_A, _B) = $(_A, _B) + num_threads = Threads.nthreads() + partial_sum = Vector{Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}(undef, num_threads) + partial_nonzero_ptr = Vector{Int64}(undef, num_threads + 1) + partial_nonzero_ptr[1] = 0 + partial_column = Vector{Int64}(undef, num_threads + 1) + partial_column[1] = 0 + + num_col = size(_A)[2] + Threads.@threads for k = 1:num_threads + start_col = 1 + div((k - 1) * num_col, num_threads) + stop_col = div(k * num_col, num_threads) + partial_column[k+1] = stop_col + result = partial_add(_A, _B, start_col, stop_col) + partial_sum[k] = result.tensor + partial_nonzero_ptr[k+1] = result.num_nonzero + end + + for i in 2:length(partial_nonzero_ptr) + partial_nonzero_ptr[i] += partial_nonzero_ptr[i-1] + end + + global _C = concat_vec(partial_sum, partial_nonzero_ptr, partial_column) + end + return (; time=time, C=_C) +end + +# Add A and B from column start_col to stop_col (inclusive) +function partial_add(A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, B::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, start_col::Int64, stop_col::Int64) + @inbounds @fastmath(begin + A_lvl = A.lvl # DenseLevel + A_lvl_2 = A_lvl.lvl # SparseListLevel + A_lvl_ptr = A_lvl_2.ptr # Vector{Int64} + A_lvl_idx = A_lvl_2.idx # Vector{Int64} + # A_lvl_3 = A_lvl_2.lvl # ElementLevel + A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64} + + B_lvl = B.lvl # DenseLevel + B_lvl_2 = B_lvl.lvl # SparseListLevel + B_lvl_ptr = B_lvl_2.ptr # Vector{Int64} + B_lvl_idx = B_lvl_2.idx # Vector{Int64} + # B_lvl_3 = B_lvl_2.lvl # ElementLevel + B_lvl_2_val = B_lvl_2.lvl.val # Vector{Float64} + + # Assertion + # A_lvl.shape == B_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl.shape) != $(B_lvl.shape))")) + # A_lvl_2.shape == B_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl_2.shape) != $(B_lvl_2.shape))")) + # A_lvl.shape >= stop_col || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl.shape) < $(stop_col))")) + + C_lvl_2_val = Vector{Float64}() + C_lvl_idx = Vector{Int64}() + C_lvl_ptr = Vector{Int64}([1]) + current_ptr = 1 + for j = start_col:stop_col + A_idx = A_lvl_ptr[j] + B_idx = B_lvl_ptr[j] + while A_idx < A_lvl_ptr[j+1] && B_idx < B_lvl_ptr[j+1] + current_ptr += 1 + A_row_idx = A_lvl_idx[A_idx] + B_row_idx = B_lvl_idx[B_idx] + + if A_row_idx < B_row_idx + push!(C_lvl_2_val, A_lvl_2_val[A_idx]) + push!(C_lvl_idx, A_row_idx) + A_idx += 1 + elseif A_row_idx > B_row_idx + push!(C_lvl_2_val, B_lvl_2_val[B_idx]) + push!(C_lvl_idx, B_row_idx) + B_idx += 1 + else + push!(C_lvl_2_val, A_lvl_2_val[A_idx] + B_lvl_2_val[B_idx]) + push!(C_lvl_idx, A_row_idx) + A_idx += 1 + B_idx += 1 + end + end + + append!(C_lvl_2_val, @view A_lvl_2_val[A_idx:A_lvl_ptr[j+1]-1]) + append!(C_lvl_idx, @view A_lvl_idx[A_idx:A_lvl_ptr[j+1]-1]) + current_ptr += A_lvl_ptr[j+1] - A_idx + + append!(C_lvl_2_val, @view B_lvl_2_val[B_idx:B_lvl_ptr[j+1]-1]) + append!(C_lvl_idx, @view B_lvl_idx[B_idx:B_lvl_ptr[j+1]-1]) + current_ptr += B_lvl_ptr[j+1] - B_idx + + append!(C_lvl_ptr, current_ptr) + end + + C_lvl_3 = Element{0.0,Float64,Int64}(C_lvl_2_val) + C_lvl_shape = A_lvl_2.shape + + C_lvl_2 = SparseList{Int64}(C_lvl_3, C_lvl_shape, C_lvl_ptr, C_lvl_idx) + C_lvl = Dense{Int64}(C_lvl_2, stop_col - start_col + 1) + + C = Tensor(C_lvl) + return (tensor=C, num_nonzero=current_ptr - 1) + end) +end diff --git a/parallel/spadd/serial_default_implementation.jl b/parallel/spadd/serial_default_implementation.jl new file mode 100644 index 00000000..645081aa --- /dev/null +++ b/parallel/spadd/serial_default_implementation.jl @@ -0,0 +1,19 @@ +using Finch +using BenchmarkTools + + +function serial_default_implementation_add(A, B) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _B = Tensor(Dense(SparseList(Element(0.0))), B) + time = @belapsed begin + (_A, _B) = $(_A, _B) + global _C = Tensor(Dense(SparseList(Element(0.0)))) + @finch mode = :fast begin + _C .= 0 + for j = _, i = _ + _C[i, j] = _A[i, j] + _B[i, j] + end + end + end + return (; time=time, C=_C) +end diff --git a/parallel/spmv/.gitignore b/parallel/spmv/.gitignore new file mode 100644 index 00000000..4902375f --- /dev/null +++ b/parallel/spmv/.gitignore @@ -0,0 +1,2 @@ +Manifest.toml +slurm* diff --git a/parallel/spmv/dynamic_rows_grain.jl b/parallel/spmv/dynamic_rows_grain.jl new file mode 100644 index 00000000..fb4a092a --- /dev/null +++ b/parallel/spmv/dynamic_rows_grain.jl @@ -0,0 +1,52 @@ +using Finch +using BenchmarkTools + +function dynamic_rows_grain_helper(grain::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape) + val = y_lvl_val + y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads())) + x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads())) + tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads())) + tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads())) + tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads())) + + Threads.@threads for group = 1:cld(tns_lvl.shape, grain) + Finch.@barrier begin + @inbounds @fastmath(begin + for i = (group-1)*grain+1:min(tns_lvl.shape, group * grain) + for ptr = tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1 + val[i] += tns_lvl_2_val[ptr] * x_lvl_val[tns_lvl_idx[ptr]] + end + end + end) + end + end + + resize!(val, tns_lvl.shape) + end) +end + +function dynamic_rows_grain_generator(grain) + return (y, A, x) -> dynamic_rows_grain(grain, y, A, x) +end + +function dynamic_rows_grain(grain, y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + + time = @belapsed dynamic_rows_grain_helper($grain, $_y, $_A, $_x) + return (; time=time, y=_y) +end diff --git a/parallel/spmv/finch_parallel.jl b/parallel/spmv/finch_parallel.jl new file mode 100644 index 00000000..6c0c1e0e --- /dev/null +++ b/parallel/spmv/finch_parallel.jl @@ -0,0 +1,19 @@ +using Finch +using BenchmarkTools + + +function finch_parallel(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (_y, _A, _x) = $(_y, _A, _x) + @finch mode = :fast begin + _y .= 0 + for i = parallel(_), j = _ + _y[i] += _A[i, j] * _x[j] + end + end + end + return (; time=time, y=_y) +end diff --git a/parallel/spmv/graph.py b/parallel/spmv/graph.py new file mode 100644 index 00000000..f4f8d955 --- /dev/null +++ b/parallel/spmv/graph.py @@ -0,0 +1,219 @@ +import json +import os +from collections import defaultdict + +import matplotlib.pyplot as plt + +GRAPH_FOLDER = "graph" +SPEEDUP_FOLDER = "speedup" +RUNTIME_FOLDER = "runtime" +RESULTS_FOLDER = "results" +MEAN_SPEEDUP_FOLDER = "mean-speedup" + +NTHREADS = [i + 1 for i in range(12)] + +DEFAULT_METHOD = "serial_default_implementation" +METHODS = [ + # DEFAULT_METHOD, + # "finch_parallel", + "static_rows_equal", + "dynamic_rows_grain_1", + "dynamic_rows_grain_10", + "merge", + # "graph_partition_reorder_merge", + "graph_partition_weighted_reorder_merge", +] + +DATASETS = { + "uniform": ["1024-0.1", "8192-0.1", "1048576-3000000"], + "FEMLAB": ["FEMLAB-poisson3Da", "FEMLAB-poisson3Db"], + "vanHeukelum": [ + "vanHeukelum-cage10", + # "vanHeukelum-cage11", + # "vanHeukelum-cage12" + ], + "Williams": ["Williams-webbase-1M"], +} +NUM_MATRICES = sum([len(matrices) for matrices in DATASETS.values()]) + +COLORS = [ + "gray", + "cadetblue", + "saddlebrown", + "navy", + "black", + "orange", + "green", + "red", + "purple", +] + + +def load_json(): + combine_results = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: {}))) + for n_thread in NTHREADS: + results_json = json.load( + open(f"{RESULTS_FOLDER}/spmv_{n_thread}_threads.json", "r") + ) + for result in results_json: + + matrix = ( + result["matrix"].replace("/", "-") + if result["dataset"] != "uniform" + else f"{result['matrix']['size']}-{result['matrix']['sparsity']}" + ) + combine_results[result["dataset"]][matrix][result["method"]][ + result["n_threads"] + ] = result["time"] + + return combine_results + + +def plot_speedup_result(results, dataset, matrix, save_location): + plt.figure(figsize=(10, 10)) + for method, color in zip(METHODS, COLORS): + plt.plot( + NTHREADS, + [ + results[dataset][matrix][DEFAULT_METHOD][n_thread] + / results[dataset][matrix][method][n_thread] + for n_thread in NTHREADS + ], + label=method, + color=color, + marker="o", + linestyle="-", + linewidth=1, + ) + + plt.title(f"Speedup for {dataset}: {matrix} (with respect to {DEFAULT_METHOD})") + # plt.yscale("log", base=10) + plt.xticks(NTHREADS) + plt.xlabel("Number of Threads") + plt.ylabel(f"Speedup") + + plt.legend() + plt.savefig(save_location) + plt.close() + + +def plot_runtime_result(results, dataset, matrix, save_location): + plt.figure(figsize=(10, 10)) + for method, color in zip(METHODS, COLORS): + plt.plot( + NTHREADS, + [results[dataset][matrix][method][n_thread] for n_thread in NTHREADS], + label=method, + color=color, + marker="o", + linestyle="-", + linewidth=1, + ) + + plt.title(f"Runtime for {dataset}: {matrix}") + # plt.yscale("log", base=10) + plt.xticks(NTHREADS) + plt.xlabel("Number of Threads") + plt.ylabel(f"Runtime (in seconds)") + + plt.legend() + plt.savefig(save_location) + plt.close() + + +def plot_mean_speedup_result(results, save_location): + plt.figure(figsize=(10, 10)) + for method, color in zip(METHODS, COLORS): + speedups = [1] * len(NTHREADS) + for dataset, matrices in DATASETS.items(): + for matrix in matrices: + for i, n_thread in enumerate(NTHREADS): + speedups[i] *= ( + results[dataset][matrix][DEFAULT_METHOD][n_thread] + / results[dataset][matrix][method][n_thread] + ) + + mean_speedups = [speedup ** (1 / NUM_MATRICES) for speedup in speedups] + plt.plot( + NTHREADS, + mean_speedups, + label=method, + color=color, + marker="o", + linestyle="-", + linewidth=1, + ) + + plt.title(f"Geometric Mean Speedup (with respect to {DEFAULT_METHOD})") + # plt.yscale("log", base=10) + plt.xticks(NTHREADS) + plt.xlabel("Number of Threads") + plt.ylabel(f"Speedup") + + plt.legend() + plt.savefig(save_location) + plt.close() + + +def plot_mean_speedup_separate_result(results, save_folder): + for method, color in zip(METHODS, COLORS): + plt.figure(figsize=(10, 10)) + speedups = [1] * len(NTHREADS) + for dataset, matrices in DATASETS.items(): + for matrix in matrices: + for i, n_thread in enumerate(NTHREADS): + speedups[i] *= ( + results[dataset][matrix][DEFAULT_METHOD][n_thread] + / results[dataset][matrix][method][n_thread] + ) + + mean_speedups = [speedup ** (1 / NUM_MATRICES) for speedup in speedups] + plt.plot( + NTHREADS, + mean_speedups, + label=method, + color=color, + marker="o", + linestyle="-", + linewidth=1, + ) + + plt.title( + f"Geometric Mean Speedup for {method} (with respect to {DEFAULT_METHOD})" + ) + # plt.yscale("log", base=10) + plt.xticks(NTHREADS) + plt.xlabel("Number of Threads") + plt.ylabel(f"Speedup") + + plt.legend() + plt.savefig(os.path.join(save_folder, f"{method}-mean-speedup.png")) + plt.close() + + +if __name__ == "__main__": + os.makedirs(os.path.join(GRAPH_FOLDER, SPEEDUP_FOLDER), exist_ok=True) + os.makedirs(os.path.join(GRAPH_FOLDER, RUNTIME_FOLDER), exist_ok=True) + os.makedirs(os.path.join(GRAPH_FOLDER, MEAN_SPEEDUP_FOLDER), exist_ok=True) + + results = load_json() + for dataset, matrices in DATASETS.items(): + for matrix in matrices: + plot_speedup_result( + results, + dataset, + matrix, + os.path.join(GRAPH_FOLDER, SPEEDUP_FOLDER, f"{dataset}-{matrix}.png"), + ) + plot_runtime_result( + results, + dataset, + matrix, + os.path.join(GRAPH_FOLDER, RUNTIME_FOLDER, f"{dataset}-{matrix}.png"), + ) + + plot_mean_speedup_result(results, os.path.join(GRAPH_FOLDER, "mean-speedup.png")) + + plot_mean_speedup_separate_result( + results, os.path.join(GRAPH_FOLDER, MEAN_SPEEDUP_FOLDER) + ) diff --git a/parallel/spmv/graph/mean-speedup.png b/parallel/spmv/graph/mean-speedup.png new file mode 100644 index 00000000..dfcc4a57 Binary files /dev/null and b/parallel/spmv/graph/mean-speedup.png differ diff --git a/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_1-mean-speedup.png b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_1-mean-speedup.png new file mode 100644 index 00000000..05d55367 Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_1-mean-speedup.png differ diff --git a/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_10-mean-speedup.png b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_10-mean-speedup.png new file mode 100644 index 00000000..3820bd4d Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_10-mean-speedup.png differ diff --git a/parallel/spmv/graph/mean-speedup/graph_partition_weighted_reorder_merge-mean-speedup.png b/parallel/spmv/graph/mean-speedup/graph_partition_weighted_reorder_merge-mean-speedup.png new file mode 100644 index 00000000..81ab1a4d Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/graph_partition_weighted_reorder_merge-mean-speedup.png differ diff --git a/parallel/spmv/graph/mean-speedup/merge-mean-speedup.png b/parallel/spmv/graph/mean-speedup/merge-mean-speedup.png new file mode 100644 index 00000000..35d7abf6 Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/merge-mean-speedup.png differ diff --git a/parallel/spmv/graph/mean-speedup/static_rows_equal-mean-speedup.png b/parallel/spmv/graph/mean-speedup/static_rows_equal-mean-speedup.png new file mode 100644 index 00000000..df1e6969 Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/static_rows_equal-mean-speedup.png differ diff --git a/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png new file mode 100644 index 00000000..c3679b28 Binary files /dev/null and b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png differ diff --git a/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png new file mode 100644 index 00000000..2b54bf12 Binary files /dev/null and b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png differ diff --git a/parallel/spmv/graph/runtime/Williams-Williams-webbase-1M.png b/parallel/spmv/graph/runtime/Williams-Williams-webbase-1M.png new file mode 100644 index 00000000..d143e6d2 Binary files /dev/null and b/parallel/spmv/graph/runtime/Williams-Williams-webbase-1M.png differ diff --git a/parallel/spmv/graph/runtime/uniform-1024-0.1.png b/parallel/spmv/graph/runtime/uniform-1024-0.1.png new file mode 100644 index 00000000..8955433b Binary files /dev/null and b/parallel/spmv/graph/runtime/uniform-1024-0.1.png differ diff --git a/parallel/spmv/graph/runtime/uniform-1048576-3000000.png b/parallel/spmv/graph/runtime/uniform-1048576-3000000.png new file mode 100644 index 00000000..5a1c8bfe Binary files /dev/null and b/parallel/spmv/graph/runtime/uniform-1048576-3000000.png differ diff --git a/parallel/spmv/graph/runtime/uniform-8192-0.1.png b/parallel/spmv/graph/runtime/uniform-8192-0.1.png new file mode 100644 index 00000000..3ac940dc Binary files /dev/null and b/parallel/spmv/graph/runtime/uniform-8192-0.1.png differ diff --git a/parallel/spmv/graph/runtime/vanHeukelum-vanHeukelum-cage10.png b/parallel/spmv/graph/runtime/vanHeukelum-vanHeukelum-cage10.png new file mode 100644 index 00000000..eac51f52 Binary files /dev/null and b/parallel/spmv/graph/runtime/vanHeukelum-vanHeukelum-cage10.png differ diff --git a/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png new file mode 100644 index 00000000..8a4e8692 Binary files /dev/null and b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png differ diff --git a/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png new file mode 100644 index 00000000..30d8405e Binary files /dev/null and b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png differ diff --git a/parallel/spmv/graph/speedup/Williams-Williams-webbase-1M.png b/parallel/spmv/graph/speedup/Williams-Williams-webbase-1M.png new file mode 100644 index 00000000..53191679 Binary files /dev/null and b/parallel/spmv/graph/speedup/Williams-Williams-webbase-1M.png differ diff --git a/parallel/spmv/graph/speedup/uniform-1024-0.1.png b/parallel/spmv/graph/speedup/uniform-1024-0.1.png new file mode 100644 index 00000000..567ee6c6 Binary files /dev/null and b/parallel/spmv/graph/speedup/uniform-1024-0.1.png differ diff --git a/parallel/spmv/graph/speedup/uniform-1048576-3000000.png b/parallel/spmv/graph/speedup/uniform-1048576-3000000.png new file mode 100644 index 00000000..2bd8ca34 Binary files /dev/null and b/parallel/spmv/graph/speedup/uniform-1048576-3000000.png differ diff --git a/parallel/spmv/graph/speedup/uniform-8192-0.1.png b/parallel/spmv/graph/speedup/uniform-8192-0.1.png new file mode 100644 index 00000000..023181c1 Binary files /dev/null and b/parallel/spmv/graph/speedup/uniform-8192-0.1.png differ diff --git a/parallel/spmv/graph/speedup/vanHeukelum-vanHeukelum-cage10.png b/parallel/spmv/graph/speedup/vanHeukelum-vanHeukelum-cage10.png new file mode 100644 index 00000000..698a4418 Binary files /dev/null and b/parallel/spmv/graph/speedup/vanHeukelum-vanHeukelum-cage10.png differ diff --git a/parallel/spmv/graph_partition_reorder_merge.jl b/parallel/spmv/graph_partition_reorder_merge.jl new file mode 100644 index 00000000..ebd4b745 --- /dev/null +++ b/parallel/spmv/graph_partition_reorder_merge.jl @@ -0,0 +1,18 @@ +using Finch +using BenchmarkTools + +include("utils/merge.jl") +include("utils/permutation.jl") + +function graph_partition_reorder_merge(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + + perm = create_permutation(_A) + _A = matrix_col_permutation(_A, perm) + time = @belapsed merge_helper($_y, $_A, $_x) + _y = vector_permutation(_y, invperm(perm)) + return (; time=time, y=_y) +end + diff --git a/parallel/spmv/graph_partition_weighted_reorder_merge.jl b/parallel/spmv/graph_partition_weighted_reorder_merge.jl new file mode 100644 index 00000000..2155d834 --- /dev/null +++ b/parallel/spmv/graph_partition_weighted_reorder_merge.jl @@ -0,0 +1,18 @@ +using Finch +using BenchmarkTools + +include("utils/merge.jl") +include("utils/permutation.jl") + +function graph_partition_weighted_reorder_merge(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + + perm = create_weighted_permutation(_A) + _A = matrix_col_permutation(_A, perm) + time = @belapsed merge_helper($_y, $_A, $_x) + _y = vector_permutation(_y, invperm(perm)) + return (; time=time, y=_y) +end + diff --git a/parallel/spmv/merge.jl b/parallel/spmv/merge.jl new file mode 100644 index 00000000..f4dc1daf --- /dev/null +++ b/parallel/spmv/merge.jl @@ -0,0 +1,14 @@ +using Finch +using BenchmarkTools + +include("utils/merge.jl") + +function merge(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + + time = @belapsed merge_swizzle_helper($_y, $_A, $_x) + return (; time=time, y=_y) +end + diff --git a/parallel/spmv/results/spmv_10_threads.json b/parallel/spmv/results/spmv_10_threads.json new file mode 100644 index 00000000..50894f41 --- /dev/null +++ b/parallel/spmv/results/spmv_10_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009643868, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003440099, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002845301, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.06236093, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.009149666, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002606548, + "n_threads": 10, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002587723, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002533871, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000167899, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.8454e-5, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 3.873e-5, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000506439, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 8.9795e-5, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.0905e-5, + "n_threads": 10, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.2894e-5, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.1114e-5, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000468416, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000120169, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000108414, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000667579, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000170153, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.3415e-5, + "n_threads": 10, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.8251e-5, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.6844e-5, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008116175, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002274642, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001714347, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.008215332, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002621321, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001337518, + "n_threads": 10, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000735361, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000793144, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.5527e-5, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.4684e-5, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.327e-5, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 6.8314e-5, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5987e-5, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6707e-5, + "n_threads": 10, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5733e-5, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.676e-5, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.017556801, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002451618, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001776321, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002033005, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001814307, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001778023, + "n_threads": 10, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001815023, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001790017, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.024290413, + "n_threads": 10, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005790899, + "n_threads": 10, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004069952, + "n_threads": 10, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.070014211, + "n_threads": 10, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.01169825, + "n_threads": 10, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004476927, + "n_threads": 10, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004109582, + "n_threads": 10, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.00390903, + "n_threads": 10, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_11_threads.json b/parallel/spmv/results/spmv_11_threads.json new file mode 100644 index 00000000..8b4dc88f --- /dev/null +++ b/parallel/spmv/results/spmv_11_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009519351, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004151958, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003503004, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.05857516, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.009049442, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002777395, + "n_threads": 11, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002850664, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002721331, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000166378, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.0023e-5, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 3.8218e-5, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000466475, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 8.6463e-5, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 3.9445e-5, + "n_threads": 11, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.3237e-5, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.2714e-5, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.00047243, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.00011345, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 9.9091e-5, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000615361, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000154398, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.5221e-5, + "n_threads": 11, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.2861e-5, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.303e-5, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.007744543, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001739751, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001594418, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.007926876, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00245018, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001075475, + "n_threads": 11, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000723042, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000713275, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.5601e-5, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.4959e-5, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.1398e-5, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 6.2644e-5, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5593e-5, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.4851e-5, + "n_threads": 11, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5753e-5, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.4734e-5, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.016515352, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002263252, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001776991, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001945073, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001811728, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001767701, + "n_threads": 11, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001797292, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001791533, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.023331361, + "n_threads": 11, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.00517173, + "n_threads": 11, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.00431359, + "n_threads": 11, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.064806961, + "n_threads": 11, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.010919977, + "n_threads": 11, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004261696, + "n_threads": 11, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004231305, + "n_threads": 11, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004286125, + "n_threads": 11, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_12_threads.json b/parallel/spmv/results/spmv_12_threads.json new file mode 100644 index 00000000..2dca4862 --- /dev/null +++ b/parallel/spmv/results/spmv_12_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009956742, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003543329, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003018592, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.054223164, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.009281952, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002476554, + "n_threads": 12, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002475601, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002396018, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000166476, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.6544e-5, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 3.8386e-5, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000423218, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 8.1339e-5, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.0795e-5, + "n_threads": 12, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.1059e-5, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 3.9998e-5, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.00046491, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000101879, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 9.2965e-5, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.001114028, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000172956, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.4726e-5, + "n_threads": 12, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.2158e-5, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 6.2546e-5, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.00726372, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001529965, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001531283, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.005779237, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00240589, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000969966, + "n_threads": 12, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000661485, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00064121, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.6121e-5, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.3526e-5, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.3031e-5, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 5.9906e-5, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5602e-5, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6395e-5, + "n_threads": 12, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6095e-5, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5727e-5, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.016858536, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002119424, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001766678, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001916977, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001798534, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001789788, + "n_threads": 12, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001807332, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001802153, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.023026484, + "n_threads": 12, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.00483719, + "n_threads": 12, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004030619, + "n_threads": 12, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.057069952, + "n_threads": 12, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.010046876, + "n_threads": 12, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.003883476, + "n_threads": 12, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.003866337, + "n_threads": 12, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004119254, + "n_threads": 12, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_1_threads.json b/parallel/spmv/results/spmv_1_threads.json new file mode 100644 index 00000000..99abe8c4 --- /dev/null +++ b/parallel/spmv/results/spmv_1_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009758493, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.011934781, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.009648794, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.376620832, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.045343777, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.00948524, + "n_threads": 1, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.009382047, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.009472763, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000166965, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000204249, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000162038, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.004114989, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000571666, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.0001708, + "n_threads": 1, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000169842, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000170159, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000471069, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000495378, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000421551, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.005181615, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000937408, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.00043271, + "n_threads": 1, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000432927, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000433703, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008194762, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.007958217, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.007081718, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.044890333, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.01203481, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.007812929, + "n_threads": 1, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.007975824, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.007909392, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.5647e-5, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000114406, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000106025, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000464804, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000144713, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 8.4714e-5, + "n_threads": 1, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 8.4217e-5, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 8.5009e-5, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.01736104, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.014943839, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.011276663, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.0145074, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.011918071, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.01107792, + "n_threads": 1, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.010968425, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.010979151, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.02377866, + "n_threads": 1, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.024882528, + "n_threads": 1, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.023154304, + "n_threads": 1, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.509482449, + "n_threads": 1, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.069820808, + "n_threads": 1, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.022243753, + "n_threads": 1, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.022472338, + "n_threads": 1, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.023075844, + "n_threads": 1, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_2_threads.json b/parallel/spmv/results/spmv_2_threads.json new file mode 100644 index 00000000..3a9e9245 --- /dev/null +++ b/parallel/spmv/results/spmv_2_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009964391, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.007972971, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.005877332, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.23656665, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.029448929, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.005641295, + "n_threads": 2, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.00564319, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.005551018, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000166121, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000124576, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 9.9059e-5, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.002430129, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000331861, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.00010153, + "n_threads": 2, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000102108, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000103103, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000467852, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000280337, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000243803, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.003017835, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000506079, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000226237, + "n_threads": 2, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000219601, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000221282, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008039944, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.004703203, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00432242, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.025745014, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.006596804, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.004413467, + "n_threads": 2, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003589113, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003635945, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.5644e-5, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 7.61e-5, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 5.8658e-5, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000269209, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 8.0223e-5, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.9227e-5, + "n_threads": 2, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.9069e-5, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.9087e-5, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.017415185, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.0098262, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.005780872, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.008157137, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.006249572, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.00536363, + "n_threads": 2, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.005423026, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.005580556, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.024042514, + "n_threads": 2, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.019520349, + "n_threads": 2, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.012407825, + "n_threads": 2, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.283553106, + "n_threads": 2, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.045740471, + "n_threads": 2, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.013791914, + "n_threads": 2, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.013032261, + "n_threads": 2, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.013846252, + "n_threads": 2, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_3_threads.json b/parallel/spmv/results/spmv_3_threads.json new file mode 100644 index 00000000..8c726fa0 --- /dev/null +++ b/parallel/spmv/results/spmv_3_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.010034864, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.005702206, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.00529541, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.16750896, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.019890016, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004619594, + "n_threads": 3, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004590387, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004782672, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000168131, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 9.5825e-5, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 7.9691e-5, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.001730798, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000244045, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 7.8443e-5, + "n_threads": 3, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 7.8327e-5, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 7.9323e-5, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000483364, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000218196, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.00020345, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.002179718, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000386113, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000163194, + "n_threads": 3, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000159581, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000157146, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008002527, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003662739, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003372173, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.017701609, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.005652785, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003126198, + "n_threads": 3, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002381582, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002357669, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.5258e-5, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 5.6856e-5, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.6351e-5, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000196504, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 5.8414e-5, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.1517e-5, + "n_threads": 3, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.0788e-5, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.1476e-5, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.01737392, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.005220831, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003997662, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.006365112, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.004410126, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.00381635, + "n_threads": 3, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003831612, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003768764, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.024035454, + "n_threads": 3, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.013937966, + "n_threads": 3, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.011814697, + "n_threads": 3, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.209505113, + "n_threads": 3, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.030546431, + "n_threads": 3, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.012302445, + "n_threads": 3, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.008427558, + "n_threads": 3, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.009402599, + "n_threads": 3, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_4_threads.json b/parallel/spmv/results/spmv_4_threads.json new file mode 100644 index 00000000..de3250f5 --- /dev/null +++ b/parallel/spmv/results/spmv_4_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009845164, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.005387483, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004176382, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.127107448, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.017085461, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003669864, + "n_threads": 4, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003864655, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003994657, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.00016827, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 8.2798e-5, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 6.1718e-5, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.001161871, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.00018191, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 6.492e-5, + "n_threads": 4, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 6.6829e-5, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 6.6348e-5, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000466017, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000187027, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000165901, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.0014863, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000313389, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000138918, + "n_threads": 4, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000133718, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000133512, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008020643, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003470342, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002803906, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.014968816, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.004926657, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002386644, + "n_threads": 4, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001786607, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00177909, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.6165e-5, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.6473e-5, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.938e-5, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.000136472, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.6004e-5, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.5445e-5, + "n_threads": 4, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.5213e-5, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.2156e-5, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.01632266, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003858071, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002902628, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.004289577, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003202864, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002979373, + "n_threads": 4, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002901255, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002925194, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.023934745, + "n_threads": 4, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.012834558, + "n_threads": 4, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.009554827, + "n_threads": 4, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.165732327, + "n_threads": 4, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.024912395, + "n_threads": 4, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.009573885, + "n_threads": 4, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.009831324, + "n_threads": 4, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.009350548, + "n_threads": 4, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_5_threads.json b/parallel/spmv/results/spmv_5_threads.json new file mode 100644 index 00000000..c22a215b --- /dev/null +++ b/parallel/spmv/results/spmv_5_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009567207, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004687249, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004546803, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.118082368, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.01372927, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002981844, + "n_threads": 5, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003414457, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003350071, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.00016666, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 7.0345e-5, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.4058e-5, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000961102, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000150538, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.4963e-5, + "n_threads": 5, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.254e-5, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.4561e-5, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000474019, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000177902, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000147282, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.001236862, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000269187, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000112857, + "n_threads": 5, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.0001072, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000106968, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008057907, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003022412, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002447972, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.012907031, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003772267, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002026583, + "n_threads": 5, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001485475, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001766963, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.6056e-5, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.9651e-5, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.3699e-5, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.00011661, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 4.0058e-5, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.0503e-5, + "n_threads": 5, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.9704e-5, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.9704e-5, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.017574168, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.004571, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002449066, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.0036248, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002847066, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002436183, + "n_threads": 5, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002348688, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002348761, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.023977009, + "n_threads": 5, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.006771296, + "n_threads": 5, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.008518309, + "n_threads": 5, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.125873658, + "n_threads": 5, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.020248894, + "n_threads": 5, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.008140321, + "n_threads": 5, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.007933917, + "n_threads": 5, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.008153084, + "n_threads": 5, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_6_threads.json b/parallel/spmv/results/spmv_6_threads.json new file mode 100644 index 00000000..564d1b14 --- /dev/null +++ b/parallel/spmv/results/spmv_6_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009361234, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004478361, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003534819, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.100616344, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.013263877, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003124986, + "n_threads": 6, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003100511, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003088635, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000168818, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 6.1869e-5, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.6988e-5, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000822581, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000134858, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.1506e-5, + "n_threads": 6, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.2901e-5, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.2046e-5, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000471814, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000153087, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000137955, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.001063284, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000239853, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 9.811e-5, + "n_threads": 6, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 9.2632e-5, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 9.4824e-5, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008051642, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002614253, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00246389, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.010403887, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00393785, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001883809, + "n_threads": 6, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001164359, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.0011666, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.5592e-5, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.3877e-5, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6638e-5, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 9.9549e-5, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.6346e-5, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.7678e-5, + "n_threads": 6, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6703e-5, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.7389e-5, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.017526908, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.00278057, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002054939, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003030898, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002767055, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002009575, + "n_threads": 6, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001942518, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002004999, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.024166515, + "n_threads": 6, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.006199904, + "n_threads": 6, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.007344162, + "n_threads": 6, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.114795672, + "n_threads": 6, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.017183134, + "n_threads": 6, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005600838, + "n_threads": 6, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.007002531, + "n_threads": 6, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005885946, + "n_threads": 6, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_7_threads.json b/parallel/spmv/results/spmv_7_threads.json new file mode 100644 index 00000000..26e801f6 --- /dev/null +++ b/parallel/spmv/results/spmv_7_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009616459, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.005433496, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004611218, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.087252937, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.012298776, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003267433, + "n_threads": 7, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002727487, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003282652, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000176648, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.6046e-5, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.8348e-5, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000701823, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000114441, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.1847e-5, + "n_threads": 7, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.7e-5, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.404e-5, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.00047561, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000144885, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000134665, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000936077, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000222172, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 8.911e-5, + "n_threads": 7, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.9689e-5, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 8.7207e-5, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008107633, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002707667, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002715647, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.010539581, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003588769, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001565399, + "n_threads": 7, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001062527, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001009851, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.6731e-5, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.1831e-5, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6577e-5, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 8.7477e-5, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.1671e-5, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6019e-5, + "n_threads": 7, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5882e-5, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.6186e-5, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.017520845, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.003367956, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001910576, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002621007, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002502208, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001824189, + "n_threads": 7, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001862839, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001873753, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.02395662, + "n_threads": 7, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.007852781, + "n_threads": 7, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004915653, + "n_threads": 7, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.098469177, + "n_threads": 7, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.015045842, + "n_threads": 7, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005972665, + "n_threads": 7, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005895998, + "n_threads": 7, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005981885, + "n_threads": 7, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_8_threads.json b/parallel/spmv/results/spmv_8_threads.json new file mode 100644 index 00000000..0a0247fd --- /dev/null +++ b/parallel/spmv/results/spmv_8_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009633471, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003509669, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003125268, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.075567052, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.010366105, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002788955, + "n_threads": 8, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002749097, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.002698756, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000167885, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.1249e-5, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.2164e-5, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000619043, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000107943, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.4571e-5, + "n_threads": 8, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.3739e-5, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.4989e-5, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000480932, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000142752, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000126561, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000830785, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000201268, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.7033e-5, + "n_threads": 8, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.7338e-5, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.509e-5, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.007914173, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002474617, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002108031, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.010114297, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.003139314, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.00141751, + "n_threads": 8, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000897536, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000919506, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.6804e-5, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.7154e-5, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.2343e-5, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 7.893e-5, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 3.0141e-5, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.3056e-5, + "n_threads": 8, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.4862e-5, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.3545e-5, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.016180878, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002931903, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001838998, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002342052, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002232027, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001832037, + "n_threads": 8, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001836819, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001831811, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.022766545, + "n_threads": 8, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.006992848, + "n_threads": 8, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005595463, + "n_threads": 8, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.085630893, + "n_threads": 8, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.013449564, + "n_threads": 8, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005515747, + "n_threads": 8, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005354054, + "n_threads": 8, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005351433, + "n_threads": 8, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/results/spmv_9_threads.json b/parallel/spmv/results/spmv_9_threads.json new file mode 100644 index 00000000..43c4fd00 --- /dev/null +++ b/parallel/spmv/results/spmv_9_threads.json @@ -0,0 +1,466 @@ +[ + { + "time": 0.009908263, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.004814435, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003935865, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.070765292, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.010500957, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003227291, + "n_threads": 9, + "method": "merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003207084, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.003073351, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "Williams", + "matrix": "Williams/webbase-1M" + }, + { + "time": 0.000167905, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 5.0516e-5, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.4298e-5, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000559596, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 9.8149e-5, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.1998e-5, + "n_threads": 9, + "method": "merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.0085e-5, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 4.385e-5, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "vanHeukelum", + "matrix": "vanHeukelum/cage10" + }, + { + "time": 0.000473874, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000131775, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000117682, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000741204, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.000186258, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.6094e-5, + "n_threads": 9, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.0633e-5, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 7.1818e-5, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Da" + }, + { + "time": 0.008060779, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002075341, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001837394, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.009405833, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.002912078, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.001223185, + "n_threads": 9, + "method": "merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000803988, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 0.000800697, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "FEMLAB", + "matrix": "FEMLAB/poisson3Db" + }, + { + "time": 9.6064e-5, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5906e-5, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.3975e-5, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 7.158e-5, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.8141e-5, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.5829e-5, + "n_threads": 9, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.3908e-5, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 2.4674e-5, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1024, + "sparsity": 0.1 + } + }, + { + "time": 0.017438506, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002011319, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001814129, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.002144707, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001850657, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.00178314, + "n_threads": 9, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001809725, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.001819882, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 8192, + "sparsity": 0.1 + } + }, + { + "time": 0.022221907, + "n_threads": 9, + "method": "serial_default_implementation", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.003929099, + "n_threads": 9, + "method": "finch_parallel", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005236155, + "n_threads": 9, + "method": "static_rows_equal", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.077467082, + "n_threads": 9, + "method": "dynamic_rows_grain_1", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.012714277, + "n_threads": 9, + "method": "dynamic_rows_grain_10", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.004486373, + "n_threads": 9, + "method": "merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.005008397, + "n_threads": 9, + "method": "graph_partition_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + }, + { + "time": 0.003937992, + "n_threads": 9, + "method": "graph_partition_weighted_reorder_merge", + "dataset": "uniform", + "matrix": { + "size": 1048576, + "sparsity": 3000000 + } + } +] diff --git a/parallel/spmv/run.sh b/parallel/spmv/run.sh new file mode 100755 index 00000000..8b5565de --- /dev/null +++ b/parallel/spmv/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +for (( t=1 ; t<=$1 ; t++)); +do + echo "Running run_spmv.jl with $t threads" + julia "--threads=$t" "run_spmv.jl" +done diff --git a/parallel/spmv/run_local.sh b/parallel/spmv/run_local.sh new file mode 100755 index 00000000..d4525b32 --- /dev/null +++ b/parallel/spmv/run_local.sh @@ -0,0 +1 @@ +source run.sh 4 diff --git a/parallel/spmv/run_slurm.sl b/parallel/spmv/run_slurm.sl new file mode 100644 index 00000000..b1e2eb37 --- /dev/null +++ b/parallel/spmv/run_slurm.sl @@ -0,0 +1,12 @@ +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=24 +#SBATCH --exclusive +#SBATCH -t 12:00:00 +#SBATCH --partition=lanka-v3 +#SBATCH --qos=commit-main +#SBATCH --mem 102400 +cd /data/scratch/paramuth/FinchBenchmarks/parallel/spmv +source run.sh 12 + diff --git a/parallel/spmv/run_spmv.jl b/parallel/spmv/run_spmv.jl new file mode 100644 index 00000000..5531e2ad --- /dev/null +++ b/parallel/spmv/run_spmv.jl @@ -0,0 +1,145 @@ +using Base: nothing_sentinel +#!/usr/bin/env julia +if abspath(PROGRAM_FILE) == @__FILE__ + using Pkg + Pkg.activate(dirname(@__DIR__)) + Pkg.instantiate() +end +include("../../deps/diagnostics.jl") +print_diagnostics() + +using MatrixDepot +using BenchmarkTools +using ArgParse +using DataStructures +using JSON +using LinearAlgebra +using Random + +Random.seed!(1234) + +using ThreadPinning +pinthreads(numa(1)) + +# Parsing Arguments +s = ArgParseSettings("Run Parallel SpMV Experiments.") +@add_arg_table! s begin + "--output", "-o" + arg_type = String + help = "output file path" + "--dataset", "-d" + arg_type = String + help = "dataset keyword" + "--method", "-m" + arg_type = String + help = "method keyword" + "--accuracy-check", "-a" + action = :store_true + help = "check method accuracy" +end +parsed_args = parse_args(ARGS, s) + +# Mapping from dataset types to datasets +datasets = Dict( + "uniform" => [ + OrderedDict("size" => 2^10, "sparsity" => 0.1), + OrderedDict("size" => 2^13, "sparsity" => 0.1), + OrderedDict("size" => 2^20, "sparsity" => 3_000_000) + ], + "FEMLAB" => [ + "FEMLAB/poisson3Da", + "FEMLAB/poisson3Db", + ], + "vanHeukelum" => [ + "vanHeukelum/cage10", + "vanHeukelum/cage11", + "vanHeukelum/cage12", + ], + "Williams" => [ + "Williams/webbase-1M", + ], +) + +# Mapping from method keywords to methods +include("serial_default_implementation.jl") +# include("finch_parallel.jl") +include("static_rows_equal.jl") +# include("dynamic_rows_grain.jl") +include("merge.jl") +# include("graph_partition_reorder_merge.jl") +include("graph_partition_weighted_reorder_merge.jl") + +methods = OrderedDict( + "serial_default_implementation" => serial_default_implementation_mul, + # "finch_parallel" => finch_parallel, + "static_rows_equal" => static_rows_equal, + # "dynamic_rows_grain_1" => dynamic_rows_grain_generator(1), + # "dynamic_rows_grain_10" => dynamic_rows_grain_generator(10), + "merge" => merge, + # "graph_partition_reorder_merge" => graph_partition_reorder_merge, + "graph_partition_weighted_reorder_merge" => graph_partition_weighted_reorder_merge, +) + +if !isnothing(parsed_args["method"]) + method_name = parsed_args["method"] + @assert haskey(methods, method_name) "Unrecognize method for $method_name" + methods = OrderedDict( + method_name => methods[method_name] + ) +end + +function calculate_results(dataset, mtxs, results) + for mtx in mtxs + # Get relevant matrix + if dataset == "uniform" + A = fsprand(mtx["size"], mtx["size"], mtx["sparsity"]) + else + A = matrixdepot(mtx) + end + + (num_rows, num_cols) = size(A) + # x is a dense vector + x = rand(num_cols) + # y is the result vector + y = zeros(num_rows) + + for (key, method) in methods + result = method(y, A, x) + + if parsed_args["accuracy-check"] + # Check the result of the multiplication + serial_default_implementation_result = serial_default_implementation_mul(y, A, x) + @assert norm(result.y - serial_default_implementation_result.y) / norm(serial_default_implementation_result.y) < 0.01 "Incorrect result for $key" + end + + # Write result + time = result.time + @info "result for $key on $mtx" time + push!(results, OrderedDict( + "time" => time, + "n_threads" => Threads.nthreads(), + "method" => key, + "dataset" => dataset, + "matrix" => mtx, + )) + if isnothing(parsed_args["output"]) + write("results/spmv_$(Threads.nthreads())_threads.json", JSON.json(results, 4)) + else + write(parsed_args["output"], JSON.json(results, 4)) + end + end + end +end + +results = [] +if isnothing(parsed_args["dataset"]) + for (dataset, mtxs) in datasets + calculate_results(dataset, mtxs, results) + end +else + dataset = parsed_args["dataset"] + mtxs = datasets[dataset] + calculate_results(dataset, mtxs, results) +end + + diff --git a/parallel/spmv/serial_default_implementation.jl b/parallel/spmv/serial_default_implementation.jl new file mode 100644 index 00000000..d4070a8a --- /dev/null +++ b/parallel/spmv/serial_default_implementation.jl @@ -0,0 +1,19 @@ +using Finch +using BenchmarkTools + + +function serial_default_implementation_mul(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (_y, _A, _x) = $(_y, _A, _x) + @finch mode = :fast begin + _y .= 0 + for j = _, i = _ + _y[i] += _A[i, j] * _x[j] + end + end + end + return (; time=time, y=_y) +end diff --git a/parallel/spmv/static_rows_equal.jl b/parallel/spmv/static_rows_equal.jl new file mode 100644 index 00000000..cc91a507 --- /dev/null +++ b/parallel/spmv/static_rows_equal.jl @@ -0,0 +1,48 @@ +using Finch +using BenchmarkTools + +function static_rows_equal_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape) + val = y_lvl_val + y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads())) + x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads())) + tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads())) + tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads())) + tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads())) + + Threads.@threads for t = 1:Threads.nthreads() + Finch.@barrier begin + @inbounds @fastmath(begin + for i = 1+div((t - 1) * tns_lvl.shape, Threads.nthreads()):div(t * tns_lvl.shape, Threads.nthreads()) + for ptr = tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1 + val[i] += tns_lvl_2_val[ptr] * x_lvl_val[tns_lvl_idx[ptr]] + end + end + end) + end + end + + resize!(val, tns_lvl.shape) + end) +end + +function static_rows_equal(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + + time = @belapsed static_rows_equal_helper($_y, $_A, $_x) + return (; time=time, y=_y) +end diff --git a/parallel/spmv/unused/finch_kernel_parallel.jl b/parallel/spmv/unused/finch_kernel_parallel.jl new file mode 100644 index 00000000..67e95e67 --- /dev/null +++ b/parallel/spmv/unused/finch_kernel_parallel.jl @@ -0,0 +1,88 @@ +using Finch +using BenchmarkTools + +function finch_kernel_parallel_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape) + val = y_lvl_val + y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads())) + x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads())) + tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads())) + tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads())) + tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads())) + Threads.@threads for i_4 = 1:Threads.nthreads() + Finch.@barrier begin + @inbounds @fastmath(begin + phase_start_2 = max(1, 1 + fld(tns_lvl.shape * (i_4 + -1), Threads.nthreads())) + phase_stop_2 = min(tns_lvl.shape, fld(tns_lvl.shape * i_4, Threads.nthreads())) + if phase_stop_2 >= phase_start_2 + for i_7 = phase_start_2:phase_stop_2 + y_lvl_q = (1 - 1) * tns_lvl.shape + i_7 + tns_lvl_q = (1 - 1) * tns_lvl.shape + i_7 + tns_lvl_2_q = tns_lvl_ptr[tns_lvl_q] + tns_lvl_2_q_stop = tns_lvl_ptr[tns_lvl_q+1] + if tns_lvl_2_q < tns_lvl_2_q_stop + tns_lvl_2_i1 = tns_lvl_idx[tns_lvl_2_q_stop-1] + else + tns_lvl_2_i1 = 0 + end + phase_stop_3 = min(x_lvl.shape, tns_lvl_2_i1) + if phase_stop_3 >= 1 + if tns_lvl_idx[tns_lvl_2_q] < 1 + tns_lvl_2_q = Finch.scansearch(tns_lvl_idx, 1, tns_lvl_2_q, tns_lvl_2_q_stop - 1) + end + while true + tns_lvl_2_i = tns_lvl_idx[tns_lvl_2_q] + if tns_lvl_2_i < phase_stop_3 + tns_lvl_3_val = tns_lvl_2_val[tns_lvl_2_q] + x_lvl_q = (1 - 1) * x_lvl.shape + tns_lvl_2_i + x_lvl_2_val = x_lvl_val[x_lvl_q] + y_lvl_val[y_lvl_q] = tns_lvl_3_val * x_lvl_2_val + y_lvl_val[y_lvl_q] + tns_lvl_2_q += 1 + else + phase_stop_5 = min(phase_stop_3, tns_lvl_2_i) + if tns_lvl_2_i == phase_stop_5 + tns_lvl_3_val = tns_lvl_2_val[tns_lvl_2_q] + x_lvl_q = (1 - 1) * x_lvl.shape + phase_stop_5 + x_lvl_2_val_2 = x_lvl_val[x_lvl_q] + y_lvl_val[y_lvl_q] += tns_lvl_3_val * x_lvl_2_val_2 + tns_lvl_2_q += 1 + end + break + end + end + end + end + end + phase_start_6 = max(1, 1 + fld(tns_lvl.shape * i_4, Threads.nthreads())) + phase_stop_7 = tns_lvl.shape + if phase_stop_7 >= phase_start_6 + phase_stop_7 + 1 + end + end) + nothing + end + end + resize!(val, tns_lvl.shape) + end) +end + +function finch_kernel_parallel(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + + time = @belapsed finch_kernel_parallel_helper($_y, $_A, $_x) + return (; time=time, y=_y) +end diff --git a/parallel/spmv/unused/graph_partition.jl b/parallel/spmv/unused/graph_partition.jl new file mode 100644 index 00000000..3464bec5 --- /dev/null +++ b/parallel/spmv/unused/graph_partition.jl @@ -0,0 +1,78 @@ +using Finch +using BenchmarkTools +using Graphs +using Metis + +function create_partitions(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}) + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + + tns_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(tns_lvl.shape) != $(tns_lvl_2.shape))")) + + graph = SimpleGraph(tns_lvl.shape) + for v in 1:tns_lvl.shape + for ptr in tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1 + add_edge!(graph, v, tns_lvl_idx[ptr]) + end + end + + # Partition the graph + positions = Metis.partition(graph, Threads.nthreads(); alg=:KWAY) + partitions = [findall(positions .== i) for i in 1:Threads.nthreads()] + + return partitions +end + +function graph_partition_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, partitions::Vector{Vector{Int64}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape) + val = y_lvl_val + + y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads())) + x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads())) + tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads())) + tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads())) + tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads())) + partitions = (Finch).moveto(partitions, CPU(Threads.nthreads())) + + Threads.@threads for i = 1:Threads.nthreads() + Finch.@barrier begin + @inbounds @fastmath(begin + for row in partitions[i] + for ptr = tns_lvl_ptr[row]:tns_lvl_ptr[row+1]-1 + col = tns_lvl_idx[ptr] + A_val = tns_lvl_2_val[ptr] + y_lvl_val[row] += A_val * x_lvl_val[col] + end + end + end) + end + end + + resize!(val, tns_lvl.shape) + end) +end + +function graph_partition(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + partitions = create_partitions(_A) + + time = @belapsed graph_partition_helper($_y, $_A, $_x, $partitions) + return (; time=time, y=_y) +end + diff --git a/parallel/spmv/unused/split_cols_dynamic_grain_scratchspace.jl b/parallel/spmv/unused/split_cols_dynamic_grain_scratchspace.jl new file mode 100644 index 00000000..0e9b6efe --- /dev/null +++ b/parallel/spmv/unused/split_cols_dynamic_grain_scratchspace.jl @@ -0,0 +1,74 @@ +using Finch +using BenchmarkTools +using Base.Threads + + +function split_cols_dynamic_grain_scratchspace_mul(grain_size) + return (y, A, x) -> split_cols_dynamic_grain_scratchspace_helper(grain_size, y, A, x) +end +function split_cols_dynamic_grain_scratchspace_helper(grain_size, y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (grain_size, _y, _A, _x) = $(grain_size, _y, _A, _x) + split_cols_dynamic_grain_scratchspace(grain_size, _y, _A, _x) + end + return (; time=time, y=_y) +end + +function split_cols_dynamic_grain_scratchspace(grain_size::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl # DenseLevel + # y_lvl_2 = y_lvl.lvl # ElementLevel + y_lvl_val = y_lvl.lvl.val # Vector{Float64} + + A_lvl = A.lvl # DenseLevel + A_lvl_2 = A_lvl.lvl # SparseListLevel + A_lvl_ptr = A_lvl_2.ptr # Vector{Int64} + A_lvl_idx = A_lvl_2.idx # Vector{Int64} + # A_lvl_3 = A_lvl_2.lvl # ElementLevel + A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64} + + x_lvl = x.lvl # DenseLevel + # x_lvl_2 = x_lvl.lvl # ElementLevel + x_lvl_val = x_lvl.lvl.val # Vector{Float64} + + x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))")) + Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape) + + num_threads = Threads.nthreads() + y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads] + + cap_size = div(A_lvl.shape, grain_size) * grain_size + + Threads.@threads for group = 1:grain_size:cap_size + y_temp = y_temps[Threads.threadid()] + for j in group:group+grain_size-1 + for q in A_lvl_ptr[j]:A_lvl_ptr[j+1]-1 + i = A_lvl_idx[q] + temp_val = A_lvl_2_val[q] * x_lvl_val[j] + y_temp[i] += temp_val + end + end + end + + Threads.@threads for j = cap_size+1:A_lvl.shape + y_temp = y_temps[Threads.threadid()] + for q in A_lvl_ptr[j]:A_lvl_ptr[j+1]-1 + i = A_lvl_idx[q] + temp_val = A_lvl_2_val[q] * x_lvl_val[j] + y_temp[i] += temp_val + end + end + + Threads.@threads for k = 1:num_threads + for j = 1:num_threads + for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads) + y_lvl_val[i] += y_temps[j][i] + end + end + end + end) +end diff --git a/parallel/spmv/unused/split_cols_finch_parallel_atomics.jl b/parallel/spmv/unused/split_cols_finch_parallel_atomics.jl new file mode 100644 index 00000000..d0c34c20 --- /dev/null +++ b/parallel/spmv/unused/split_cols_finch_parallel_atomics.jl @@ -0,0 +1,19 @@ +using Finch +using BenchmarkTools + + +function split_cols_finch_parallel_atomics_mul(y, A, x) + _y = Tensor(Dense(AtomicElement(0.0)), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (_y, _A, _x) = $(_y, _A, _x) + @finch mode = :fast begin + _y .= 0 + for j = parallel(_), i = _ + _y[i] += _A[i, j] * _x[j] + end + end + end + return (; time=time, y=_y) +end diff --git a/parallel/spmv/unused/split_cols_finch_parallel_mutex.jl b/parallel/spmv/unused/split_cols_finch_parallel_mutex.jl new file mode 100644 index 00000000..3d020338 --- /dev/null +++ b/parallel/spmv/unused/split_cols_finch_parallel_mutex.jl @@ -0,0 +1,19 @@ +using Finch +using BenchmarkTools + + +function split_cols_finch_parallel_mutex_mul(y, A, x) + _y = Tensor(Dense(Mutex(Element(0.0))), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (_y, _A, _x) = $(_y, _A, _x) + @finch mode = :fast begin + _y .= 0 + for j = parallel(_), i = _ + _y[i] += _A[i, j] * _x[j] + end + end + end + return (; time=time, y=_y) +end diff --git a/parallel/spmv/unused/split_cols_static_scratchspace.jl b/parallel/spmv/unused/split_cols_static_scratchspace.jl new file mode 100644 index 00000000..2195031f --- /dev/null +++ b/parallel/spmv/unused/split_cols_static_scratchspace.jl @@ -0,0 +1,59 @@ +using Finch +using BenchmarkTools +using Base.Threads + + +function split_cols_static_scratchspace_mul(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (_y, _A, _x) = $(_y, _A, _x) + split_cols_static_scratchspace(_y, _A, _x) + end + return (; time=time, y=_y) +end + +function split_cols_static_scratchspace(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl # DenseLevel + # y_lvl_2 = y_lvl.lvl # ElementLevel + y_lvl_val = y_lvl.lvl.val # Vector{Float64} + + A_lvl = A.lvl # DenseLevel + A_lvl_2 = A_lvl.lvl # SparseListLevel + A_lvl_ptr = A_lvl_2.ptr # Vector{Int64} + A_lvl_idx = A_lvl_2.idx # Vector{Int64} + # A_lvl_3 = A_lvl_2.lvl # ElementLevel + A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64} + + x_lvl = x.lvl # DenseLevel + # x_lvl_2 = x_lvl.lvl # ElementLevel + x_lvl_val = x_lvl.lvl.val # Vector{Float64} + + x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))")) + Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape) + + num_threads = Threads.nthreads() + y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads] + + Threads.@threads for k = 1:num_threads + for j = 1+div((k - 1) * A_lvl.shape, num_threads):div(k * A_lvl.shape, num_threads) + for q in A_lvl_ptr[j]:A_lvl_ptr[j+1]-1 + i = A_lvl_idx[q] + temp_val = A_lvl_2_val[q] * x_lvl_val[j] + y_temps[k][i] += temp_val + end + end + end + + Threads.@threads for k = 1:num_threads + for j = 1:num_threads + for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads) + y_lvl_val[i] += y_temps[j][i] + end + end + end + end) +end diff --git a/parallel/spmv/unused/split_nonzeros_dynamic_grain_scratchspace.jl b/parallel/spmv/unused/split_nonzeros_dynamic_grain_scratchspace.jl new file mode 100644 index 00000000..11c844e9 --- /dev/null +++ b/parallel/spmv/unused/split_nonzeros_dynamic_grain_scratchspace.jl @@ -0,0 +1,87 @@ +using Finch +using BenchmarkTools +using Base.Threads + +function split_nonzeros_dynamic_grain_scratchspace_mul(grain_size) + return (y, A, x) -> split_nonzeros_dynamic_grain_scratchspace_helper(grain_size, y, A, x) +end + + +function split_nonzeros_dynamic_grain_scratchspace_helper(grain_size, y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (grain_size, _y, _A, _x) = $(grain_size, _y, _A, _x) + split_nonzeros_dynamic_grain_scratchspace(grain_size, _y, _A, _x) + end + return (; time=time, y=_y) +end + +function split_nonzeros_dynamic_grain_scratchspace(grain_size::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl # DenseLevel + # y_lvl_2 = y_lvl.lvl # ElementLevel + y_lvl_val = y_lvl.lvl.val # Vector{Float64} + + A_lvl = A.lvl # DenseLevel + A_lvl_2 = A_lvl.lvl # SparseListLevel + A_lvl_ptr = A_lvl_2.ptr # Vector{Int64} + A_lvl_idx = A_lvl_2.idx # Vector{Int64} + # A_lvl_3 = A_lvl_2.lvl # ElementLevel + A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64} + + x_lvl = x.lvl # DenseLevel + # x_lvl_2 = x_lvl.lvl # ElementLevel + x_lvl_val = x_lvl.lvl.val # Vector{Float64} + + x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))")) + Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape) + + num_threads = Threads.nthreads() + y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads] + + # Load Balancing + num_nz = A_lvl_ptr[A_lvl.shape+1] - 1 + num_iter = div(num_nz, grain_size, RoundUp) + + start_indices = [1 + grain_size * k for k in 0:num_iter] + start_cols = zeros(Int64, num_iter + 1) + + col = 2 + target_pos = 1 + target_index = start_indices[target_pos] + while (col <= A_lvl.shape + 1 && target_pos <= num_iter) + if (A_lvl_ptr[col] > target_index) + start_cols[target_pos] = col - 1 + target_pos += 1 + target_index = start_indices[target_pos] + else + col += 1 + end + end + start_cols[num_iter+1] = A_lvl.shape + + Threads.@threads for k = 1:num_iter + start_index = start_indices[k] + end_index = start_indices[k+1] - 1 + y_temp = y_temps[Threads.threadid()] + for j = start_cols[k]:start_cols[k+1] + for q in max(A_lvl_ptr[j], start_index):min(A_lvl_ptr[j+1] - 1, end_index) + i = A_lvl_idx[q] + temp_val = A_lvl_2_val[q] * x_lvl_val[j] + y_temp[i] += temp_val + end + end + end + + Threads.@threads for k = 1:num_threads + for j = 1:num_threads + for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads) + y_lvl_val[i] += y_temps[j][i] + end + end + end + end) +end diff --git a/parallel/spmv/unused/split_nonzeros_static_scratchspace.jl b/parallel/spmv/unused/split_nonzeros_static_scratchspace.jl new file mode 100644 index 00000000..349c0ead --- /dev/null +++ b/parallel/spmv/unused/split_nonzeros_static_scratchspace.jl @@ -0,0 +1,79 @@ +using Finch +using BenchmarkTools +using Base.Threads + +function split_nonzeros_static_scratchspace_mul(y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = Tensor(Dense(SparseList(Element(0.0))), A) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (_y, _A, _x) = $(_y, _A, _x) + split_nonzeros_static_scratchspace(_y, _A, _x) + end + return (; time=time, y=_y) +end + +function split_nonzeros_static_scratchspace(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl # DenseLevel + # y_lvl_2 = y_lvl.lvl # ElementLevel + y_lvl_val = y_lvl.lvl.val # Vector{Float64} + + A_lvl = A.lvl # DenseLevel + A_lvl_2 = A_lvl.lvl # SparseListLevel + A_lvl_ptr = A_lvl_2.ptr # Vector{Int64} + A_lvl_idx = A_lvl_2.idx # Vector{Int64} + # A_lvl_3 = A_lvl_2.lvl # ElementLevel + A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64} + + x_lvl = x.lvl # DenseLevel + # x_lvl_2 = x_lvl.lvl # ElementLevel + x_lvl_val = x_lvl.lvl.val # Vector{Float64} + + x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))")) + Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape) + + num_threads = Threads.nthreads() + y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads] + + # Load Balancing + num_nz = A_lvl_ptr[A_lvl.shape+1] - 1 + start_indices = [1 + div(k * num_nz, num_threads) for k in 0:num_threads] + start_cols = zeros(Int64, num_threads + 1) + + col = 2 + target_pos = 1 + target_index = start_indices[target_pos] + while (col <= A_lvl.shape + 1 && target_pos <= num_threads) + if (A_lvl_ptr[col] > target_index) + start_cols[target_pos] = col - 1 + target_pos += 1 + target_index = start_indices[target_pos] + else + col += 1 + end + end + start_cols[num_threads+1] = A_lvl.shape + + Threads.@threads for k = 1:num_threads + start_index = start_indices[k] + end_index = start_indices[k+1] - 1 + for j = start_cols[k]:start_cols[k+1] + for q in max(A_lvl_ptr[j], start_index):min(A_lvl_ptr[j+1] - 1, end_index) + i = A_lvl_idx[q] + temp_val = A_lvl_2_val[q] * x_lvl_val[j] + y_temps[k][i] += temp_val + end + end + end + + Threads.@threads for k = 1:num_threads + for j = 1:num_threads + for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads) + y_lvl_val[i] += y_temps[j][i] + end + end + end + end) +end diff --git a/parallel/spmv/unused/transpose_split_rows_dynamic_grain.jl b/parallel/spmv/unused/transpose_split_rows_dynamic_grain.jl new file mode 100644 index 00000000..4f9a6287 --- /dev/null +++ b/parallel/spmv/unused/transpose_split_rows_dynamic_grain.jl @@ -0,0 +1,59 @@ +using Finch +using BenchmarkTools +using Base.Threads + + +function transpose_split_rows_dynamic_grain_mul(grain_size) + return (y, A, x) -> transpose_split_rows_dynamic_grain_helper(grain_size, y, A, x) +end + +function transpose_split_rows_dynamic_grain_helper(grain_size, y, A, x) + _y = Tensor(Dense(Element(0.0)), y) + _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1) + _x = Tensor(Dense(Element(0.0)), x) + time = @belapsed begin + (grain_size, _y, _A, _x) = $(grain_size, _y, _A, _x) + transpose_split_rows_dynamic_grain(grain_size, _y, _A, _x) + end + return (; time=time, y=_y) +end + +function transpose_split_rows_dynamic_grain(grain_size::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape) + + cap_size = div(tns_lvl.shape, grain_size) * grain_size + + Threads.@threads for group = 1:grain_size:cap_size + Finch.@barrier group grain_size tns_lvl_ptr tns_lvl_idx y_lvl_val tns_lvl_2_val x_lvl_val begin + for i = group:group+grain_size-1 + for q in tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1 + j = tns_lvl_idx[q] + y_lvl_val[i] += tns_lvl_2_val[q] * x_lvl_val[j] + end + end + end + end + + Threads.@threads for i = cap_size+1:tns_lvl.shape + Finch.@barrier tns_lvl_ptr tns_lvl_idx y_lvl_val tns_lvl_2_val x_lvl_val i begin + for q in tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1 + j = tns_lvl_idx[q] + y_lvl_val[i] += tns_lvl_2_val[q] * x_lvl_val[j] + end + end + end + end) +end + diff --git a/parallel/spmv/utils/merge.jl b/parallel/spmv/utils/merge.jl new file mode 100644 index 00000000..58f7b216 --- /dev/null +++ b/parallel/spmv/utils/merge.jl @@ -0,0 +1,193 @@ +using Finch + +""" + merge_path_search(diagonal::Int64, num_rows::Int64, num_nzs::Int64, row_ptr::Vector{Int64}) + +Find the intersecting coordinate between the merge path and the diagonal + +# Arguments +- `diagonal::Int64`: the line s.t. row_idx + nz_idx = diagonal (diagonal >= 2) +- `num_rows::Int64`: the number of rows +- `num_nzs::Int64`: the number of nonzeros +- `row_ptr::Vector{Int64}`: the row ptr representing a cumulative number of nonzero elements +""" +function merge_path_search(diagonal::Int64, num_rows::Int64, num_nzs::Int64, row_ptr::Vector{Int64}) + x_min = max(diagonal - num_nzs - 1, 1) + x_max = min(diagonal - 1, num_rows + 1) + + while x_min < x_max + pivot = (x_min + x_max) >> 1 + if row_ptr[pivot+1] <= diagonal - pivot - 1 + x_min = pivot + 1 + else + x_max = pivot + end + end + + return (min(x_min, num_rows + 1), diagonal - x_min) +end + +""" + merge_swizzle_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + +MergeSpMV on swizzle array + +# Arguments +- `y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: +- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: +- `x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: +""" +function merge_swizzle_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape) + val = y_lvl_val + + y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads())) + x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads())) + tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads())) + tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads())) + tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads())) + + # Custom Variables + num_rows = tns_lvl.shape + num_nzs = last(tns_lvl_ptr) - 1 + num_merge_items = num_rows + num_nzs # number of rows + number of nonzeros + items_per_thread = fld(num_merge_items + Threads.nthreads() - 1, Threads.nthreads()) + row_carry_out = Vector{Int64}(undef, Threads.nthreads()) + value_carry_out = Vector{Float64}(undef, Threads.nthreads()) + + row_carry_out = (Finch).moveto(row_carry_out, CPU(Threads.nthreads())) + value_carry_out = (Finch).moveto(value_carry_out, CPU(Threads.nthreads())) + + Threads.@threads for i_4 = 1:Threads.nthreads() + Finch.@barrier begin + @inbounds @fastmath(begin + diagonal = min(items_per_thread * (i_4 - 1) + 2, num_merge_items + 2) + diagonal_end = min(diagonal + items_per_thread, num_merge_items + 2) + x_coord, y_coord = merge_path_search(diagonal, num_rows, num_nzs, tns_lvl_ptr) + x_coord_end, y_coord_end = merge_path_search(diagonal_end, num_rows, num_nzs, tns_lvl_ptr) + + running_total = 0.0 + while x_coord < x_coord_end + while y_coord < tns_lvl_ptr[x_coord + 1] + running_total += tns_lvl_2_val[y_coord] * x_lvl_val[tns_lvl_idx[y_coord]] + y_coord += 1 + end + val[x_coord] = running_total + running_total = 0.0 + x_coord += 1 + end + + while y_coord < y_coord_end + running_total += tns_lvl_2_val[y_coord] * x_lvl_val[tns_lvl_idx[y_coord]] + y_coord += 1 + end + + row_carry_out[i_4] = x_coord_end + value_carry_out[i_4] = running_total + end) + end + end + + for i = 1:Threads.nthreads() + if row_carry_out[i] < num_rows + 1 + val[row_carry_out[i]] += value_carry_out[i] + end + end + resize!(val, tns_lvl.shape) + end) +end + +""" + merge_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + +MergeSpMV on array + +# Arguments +- `y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: +- `A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}`: +- `x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: +""" +function merge_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}) + @inbounds @fastmath(begin + y_lvl = y.lvl + y_lvl_val = y_lvl.lvl.val + A_lvl = A.lvl + A_lvl_2 = A_lvl.lvl + A_lvl_ptr = A_lvl_2.ptr + A_lvl_idx = A_lvl_2.idx + A_lvl_2_val = A_lvl_2.lvl.val + x_lvl = x.lvl + x_lvl_val = x_lvl.lvl.val + x_lvl.shape == A_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl_2.shape))")) + Finch.resize_if_smaller!(y_lvl_val, A_lvl.shape) + Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl.shape) + val = y_lvl_val + + y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads())) + x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads())) + A_lvl_ptr = (Finch).moveto(A_lvl_ptr, CPU(Threads.nthreads())) + A_lvl_idx = (Finch).moveto(A_lvl_idx, CPU(Threads.nthreads())) + A_lvl_2_val = (Finch).moveto(A_lvl_2_val, CPU(Threads.nthreads())) + + # Custom Variables + num_rows = A_lvl.shape + num_nzs = last(A_lvl_ptr) - 1 + num_merge_items = num_rows + num_nzs # number of rows + number of nonzeros + items_per_thread = fld(num_merge_items + Threads.nthreads() - 1, Threads.nthreads()) + row_carry_out = Vector{Int64}(undef, Threads.nthreads()) + value_carry_out = Vector{Float64}(undef, Threads.nthreads()) + + row_carry_out = (Finch).moveto(row_carry_out, CPU(Threads.nthreads())) + value_carry_out = (Finch).moveto(value_carry_out, CPU(Threads.nthreads())) + + Threads.@threads for i_4 = 1:Threads.nthreads() + Finch.@barrier begin + @inbounds @fastmath(begin + diagonal = min(items_per_thread * (i_4 - 1) + 2, num_merge_items + 2) + diagonal_end = min(diagonal + items_per_thread, num_merge_items + 2) + x_coord, y_coord = merge_path_search(diagonal, num_rows, num_nzs, A_lvl_ptr) + x_coord_end, y_coord_end = merge_path_search(diagonal_end, num_rows, num_nzs, A_lvl_ptr) + + running_total = 0.0 + while x_coord < x_coord_end + while y_coord < A_lvl_ptr[x_coord+1] + running_total += A_lvl_2_val[y_coord] * x_lvl_val[A_lvl_idx[y_coord]] + y_coord += 1 + end + val[x_coord] = running_total + running_total = 0.0 + x_coord += 1 + end + + while y_coord < y_coord_end + running_total += A_lvl_2_val[y_coord] * x_lvl_val[A_lvl_idx[y_coord]] + y_coord += 1 + end + + row_carry_out[i_4] = x_coord_end + value_carry_out[i_4] = running_total + end) + end + end + + for i = 1:Threads.nthreads() + if row_carry_out[i] < num_rows + 1 + val[row_carry_out[i]] += value_carry_out[i] + end + end + resize!(val, A_lvl.shape) + end) +end + diff --git a/parallel/spmv/utils/permutation.jl b/parallel/spmv/utils/permutation.jl new file mode 100644 index 00000000..1fbfbf4b --- /dev/null +++ b/parallel/spmv/utils/permutation.jl @@ -0,0 +1,131 @@ +using Metis: idx_t +using Metis +using SparseArrays +using Finch + +""" + create_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}) + +Return a permutation of a matrix that will minimize communication of x value in Ax if we group the matrix in to num cores groups of equal size + +# Arguments +- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: a matrix to create permutation on, the matrix must be NxN (rows == columns) +""" +function create_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}) + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + + tns_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(tns_lvl.shape) != $(tns_lvl_2.shape))")) + + nvtxs = convert(idx_t, tns_lvl.shape) + adjncy_temp = [idx_t[] for _ in 1:tns_lvl.shape] + + for v in 1:tns_lvl.shape + for ptr in tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1 + push!(adjncy_temp[tns_lvl_idx[ptr]], v) + end + append!(adjncy_temp[v], tns_lvl_idx[tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1]) + end + + xadj = idx_t[] + push!(xadj, convert(idx_t, 1)) + for v in 1:tns_lvl.shape + push!(xadj, xadj[v] + length(adjncy_temp[v])) + end + + adjncy = vcat(adjncy_temp...) + + graph = Metis.Graph(nvtxs, xadj, adjncy) + + # Partition the graph + positions = Metis.partition(graph, Threads.nthreads(); alg=:KWAY) + + # create permutation for the graph + perm = sortperm(positions) + return perm +end + +""" + create_weighted_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}) + +Return a permutation of a matrix that will minimize communication of x value in Ax if we group the matrix in to num cores groups of equal size, where size for each row equals the number of nnz in that row + 1 + +# Arguments +- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: a matrix to create permutation on, the matrix must be NxN (rows == columns) +""" +function create_weighted_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}) + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + + tns_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(tns_lvl.shape) != $(tns_lvl_2.shape))")) + + nvtxs = convert(idx_t, tns_lvl.shape) + adjncy_temp = [idx_t[] for _ in 1:tns_lvl.shape] + vwgt = zeros(idx_t, tns_lvl.shape) + + for v in 1:tns_lvl.shape + for ptr in tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1 + push!(adjncy_temp[tns_lvl_idx[ptr]], v) + end + vwgt[v] = tns_lvl_ptr[v+1] - tns_lvl_ptr[v] + 1 # the last 1 is for the row + append!(adjncy_temp[v], tns_lvl_idx[tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1]) + end + + xadj = idx_t[] + push!(xadj, convert(idx_t, 1)) + for v in 1:tns_lvl.shape + push!(xadj, xadj[v] + length(adjncy_temp[v])) + end + + adjncy = vcat(adjncy_temp...) + + graph = Metis.Graph(nvtxs, xadj, adjncy, vwgt) + + # Partition the graph + positions = Metis.partition(graph, Threads.nthreads(); alg=:KWAY) + + # create permutation for the graph + perm = sortperm(positions) + return perm +end + +""" + vector_permutation(v::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, perm::Vector{Int64}) + +Create a permutation of a vector + +# Arguments +- `v::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: vector to be permuted +- `perm::Vector{Int64}`: permutation vector, must have size at most length(v) +""" +function vector_permutation(v::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, perm::Vector{Int64}) + v_lvl = v.lvl + v_lvl_val = v_lvl.lvl.val + v_perm = v_lvl_val[perm] + return Tensor(Dense(Element(0.0)), v_perm) +end + +""" + matrix_col_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, perm::Vector{Int64}) + +Create a column permutation of a matrix + +# Arguments +- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: matrix to be permuted +- `perm::Vector{Int64}`: permutation vector, must have size at most number of columns of A +""" +function matrix_col_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, perm::Vector{Int64}) + tns_lvl = A.body.lvl + tns_lvl_2 = tns_lvl.lvl + tns_lvl_ptr = tns_lvl_2.ptr + tns_lvl_idx = tns_lvl_2.idx + tns_lvl_2_val = tns_lvl_2.lvl.val + + _A = SparseMatrixCSC(tns_lvl.shape, tns_lvl_2.shape, tns_lvl_ptr, tns_lvl_idx, tns_lvl_2_val) + A_perm = _A[:, perm] + return Tensor(Dense(SparseList(Element(0.0))), A_perm) +end