diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index e942ab6c..00000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/.gitmodules b/.gitmodules
index f1abbe18..b09492e6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "deps/SparseRooflineBenchmark"]
 	path = deps/SparseRooflineBenchmark
-	url = https://github.com/SparseRooflineBenchmark/SparseRooflineBenchmark
+	url = git@github.com:Paramuths/SparseRooflineBenchmark.git
 [submodule "deps/taco"]
 	path = deps/taco
 	url = https://github.com/tensor-compiler/taco
diff --git a/deps/SparseRooflineBenchmark b/deps/SparseRooflineBenchmark
index 68d595bc..247652ae 160000
--- a/deps/SparseRooflineBenchmark
+++ b/deps/SparseRooflineBenchmark
@@ -1 +1 @@
-Subproject commit 68d595bc3b7bb6cc72a047b3bf15896c53948c51
+Subproject commit 247652ae192d84b1ad562fff72e4769d6fd68977
diff --git a/parallel/.gitignore b/parallel/.gitignore
new file mode 100644
index 00000000..ba39cc53
--- /dev/null
+++ b/parallel/.gitignore
@@ -0,0 +1 @@
+Manifest.toml
diff --git a/parallel/Project.toml b/parallel/Project.toml
new file mode 100644
index 00000000..59770062
--- /dev/null
+++ b/parallel/Project.toml
@@ -0,0 +1,15 @@
+[deps]
+ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
+Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+Finch = "9177782c-1635-4eb9-9bfb-d9dfa25e6bce"
+Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
+IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MatrixDepot = "b51810bb-c9f3-55da-ae3c-350fc1fbce05"
+Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b"
+SuiteSparseGraphBLAS = "c2e53296-7b14-11e9-1210-bddfa8111e1d"
+TensorMarket = "8b7d4fe7-0b45-4d0d-9dd8-5cc9b23b4b77"
+ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042"
diff --git a/parallel/spadd/.gitignore b/parallel/spadd/.gitignore
new file mode 100644
index 00000000..4902375f
--- /dev/null
+++ b/parallel/spadd/.gitignore
@@ -0,0 +1,2 @@
+Manifest.toml
+slurm*
diff --git a/parallel/spadd/concat.jl b/parallel/spadd/concat.jl
new file mode 100644
index 00000000..743e3098
--- /dev/null
+++ b/parallel/spadd/concat.jl
@@ -0,0 +1,66 @@
+using Finch
+using Base.Threads
+
+function concat(A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, B::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}})
+    @inbounds @fastmath(begin
+        A_lvl = A.lvl # DenseLevel
+        A_lvl_2 = A_lvl.lvl # SparseListLevel
+        A_lvl_ptr = A_lvl_2.ptr # Vector{Int64}
+        A_lvl_idx = A_lvl_2.idx # Vector{Int64}
+        # A_lvl_3 = A_lvl_2.lvl # ElementLevel
+        A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64}
+
+        B_lvl = B.lvl # DenseLevel
+        B_lvl_2 = B_lvl.lvl # SparseListLevel
+        B_lvl_ptr = B_lvl_2.ptr # Vector{Int64}
+        B_lvl_idx = B_lvl_2.idx # Vector{Int64}
+        # B_lvl_3 = B_lvl_2.lvl # ElementLevel
+        B_lvl_2_val = B_lvl_2.lvl.val # Vector{Float64}
+
+        # val
+        C_lvl_2_val = vcat(A_lvl_2_val, B_lvl_2_val)
+        C_lvl_3 = Element{0.0,Float64,Int64}(C_lvl_2_val)
+        # shape
+        A_lvl_2.shape == B_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl_2.shape) != $(B_lvl_2.shape))"))
+        C_lvl_shape = A_lvl_2.shape
+        # pointer
+        B_lvl_ptr_shift = B_lvl_ptr[2:end] .+ (last(A_lvl_ptr) - 1)
+        C_lvl_ptr = vcat(A_lvl_ptr, B_lvl_ptr_shift)
+        # index
+        C_lvl_idx = vcat(A_lvl_idx, B_lvl_idx)
+
+        C_lvl_2 = SparseList{Int64}(C_lvl_3, C_lvl_shape, C_lvl_ptr, C_lvl_idx)
+        C_lvl = Dense{Int64}(C_lvl_2, A_lvl.shape + B_lvl.shape)
+
+        C = Tensor(C_lvl)
+        return C
+    end)
+end
+
+function concat_vec(V::Vector{Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, nonzero_offset::Vector{Int64}, columns::Vector{Int64})
+    @inbounds @fastmath(begin
+        # val
+        B_lvl_2_val = Vector{Float64}(undef, last(nonzero_offset))
+        # shape
+        B_lvl_shape = V[1].lvl.lvl.shape
+        # pointer
+        B_lvl_ptr = Vector{Int64}(undef, last(columns) + 1)
+        B_lvl_ptr[1] = 1
+        # idx
+        B_lvl_idx = Vector{Int64}(undef, last(nonzero_offset))
+
+        Threads.@threads for i in 1:length(V)
+            B_lvl_2_val[nonzero_offset[i]+1:nonzero_offset[i+1]] .= V[i].lvl.lvl.lvl.val
+            B_lvl_idx[nonzero_offset[i]+1:nonzero_offset[i+1]] .= V[i].lvl.lvl.idx
+            B_lvl_ptr[columns[i]+2:columns[i+1]+1] = V[i].lvl.lvl.ptr[2:end] .+ nonzero_offset[i]
+        end
+        B_lvl_3 = Element{0.0,Float64,Int64}(B_lvl_2_val)
+
+        B_lvl_2 = SparseList{Int64}(B_lvl_3, B_lvl_shape, B_lvl_ptr, B_lvl_idx)
+        B_lvl = Dense{Int64}(B_lvl_2, mapreduce(A -> A.lvl.shape, +, V))
+
+        B = Tensor(B_lvl)
+        return B
+    end)
+end
+
diff --git a/parallel/spadd/graph.py b/parallel/spadd/graph.py
new file mode 100644
index 00000000..b627dce0
--- /dev/null
+++ b/parallel/spadd/graph.py
@@ -0,0 +1,116 @@
+import json
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+
+GRAPH_FOLDER = "graph"
+SPEEDUP_FOLDER = "speedup"
+RUNTIME_FOLDER = "runtime"
+RESULTS_FOLDER = "results"
+
+NTHREADS = [i + 1 for i in range(12)]
+
+DEFAULT_METHOD = "serial_default_implementation"
+METHODS = [
+    DEFAULT_METHOD,
+    # "parallel_col_separate_sparselist_results",
+    "separated_memory_concatenate_results",
+]
+
+DATASETS = [
+    {"uniform": ["1000-0.1", "10000-0.1", "1000000-3000000"]},
+    {"FEMLAB": ["FEMLAB-poisson3Da", "FEMLAB-poisson3Db"]},
+]
+
+COLORS = ["gray", "cadetblue", "saddlebrown", "navy", "black"]
+
+
+def load_json():
+    combine_results = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: {})))
+    for n_thread in NTHREADS:
+        results_json = json.load(
+            open(f"{RESULTS_FOLDER}/spadd_{n_thread}_threads.json", "r")
+        )
+        for result in results_json:
+
+            matrix = (
+                result["matrix"].replace("/", "-")
+                if result["dataset"] != "uniform"
+                else f"{result['matrix']['size']}-{result['matrix']['sparsity']}"
+            )
+            combine_results[result["dataset"]][matrix][result["method"]][
+                result["n_threads"]
+            ] = result["time"]
+
+    return combine_results
+
+
+def plot_speedup_result(results, dataset, matrix, save_location):
+    plt.figure(figsize=(10, 6))
+    for method, color in zip(METHODS, COLORS):
+        plt.plot(
+            NTHREADS,
+            [
+                results[dataset][matrix][DEFAULT_METHOD][n_thread]
+                / results[dataset][matrix][method][n_thread]
+                for n_thread in NTHREADS
+            ],
+            label=method,
+            color=color,
+            marker="o",
+            linestyle="-",
+            linewidth=1,
+        )
+
+    plt.title(
+        f"SpAdd - Speedup for {dataset}: {matrix} (with respect to {DEFAULT_METHOD})"
+    )
+    # plt.yscale("log", base=10)
+    plt.xticks(NTHREADS)
+    plt.xlabel("Number of Threads")
+    plt.ylabel(f"Speedup")
+
+    plt.legend()
+    plt.savefig(save_location)
+
+
+def plot_runtime_result(results, dataset, matrix, save_location):
+    plt.figure(figsize=(10, 6))
+    for method, color in zip(METHODS, COLORS):
+        plt.plot(
+            NTHREADS,
+            [results[dataset][matrix][method][n_thread] for n_thread in NTHREADS],
+            label=method,
+            color=color,
+            marker="o",
+            linestyle="-",
+            linewidth=1,
+        )
+
+    plt.title(f"SpAdd - Runtime for {dataset}: {matrix}")
+    # plt.yscale("log", base=10)
+    plt.xticks(NTHREADS)
+    plt.xlabel("Number of Threads")
+    plt.ylabel(f"Runtime (in seconds)")
+
+    plt.legend()
+    plt.savefig(save_location)
+
+
+if __name__ == "__main__":
+    results = load_json()
+    for datasets in DATASETS:
+        for dataset, matrices in datasets.items():
+            for matrix in matrices:
+                plot_speedup_result(
+                    results,
+                    dataset,
+                    matrix,
+                    f"{GRAPH_FOLDER}/{SPEEDUP_FOLDER}/{dataset}-{matrix}.png",
+                )
+                plot_runtime_result(
+                    results,
+                    dataset,
+                    matrix,
+                    f"{GRAPH_FOLDER}/{RUNTIME_FOLDER}/{dataset}-{matrix}.png",
+                )
diff --git a/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png
new file mode 100644
index 00000000..0fa39be8
Binary files /dev/null and b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png differ
diff --git a/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png
new file mode 100644
index 00000000..fe76ad30
Binary files /dev/null and b/parallel/spadd/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png differ
diff --git a/parallel/spadd/graph/runtime/uniform-1000-0.1.png b/parallel/spadd/graph/runtime/uniform-1000-0.1.png
new file mode 100644
index 00000000..97344036
Binary files /dev/null and b/parallel/spadd/graph/runtime/uniform-1000-0.1.png differ
diff --git a/parallel/spadd/graph/runtime/uniform-10000-0.1.png b/parallel/spadd/graph/runtime/uniform-10000-0.1.png
new file mode 100644
index 00000000..5ffd7f04
Binary files /dev/null and b/parallel/spadd/graph/runtime/uniform-10000-0.1.png differ
diff --git a/parallel/spadd/graph/runtime/uniform-1000000-3000000.png b/parallel/spadd/graph/runtime/uniform-1000000-3000000.png
new file mode 100644
index 00000000..1cb14e7e
Binary files /dev/null and b/parallel/spadd/graph/runtime/uniform-1000000-3000000.png differ
diff --git a/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png
new file mode 100644
index 00000000..6ef958a6
Binary files /dev/null and b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png differ
diff --git a/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png
new file mode 100644
index 00000000..d6689cc9
Binary files /dev/null and b/parallel/spadd/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png differ
diff --git a/parallel/spadd/graph/speedup/uniform-1000-0.1.png b/parallel/spadd/graph/speedup/uniform-1000-0.1.png
new file mode 100644
index 00000000..363ad8f3
Binary files /dev/null and b/parallel/spadd/graph/speedup/uniform-1000-0.1.png differ
diff --git a/parallel/spadd/graph/speedup/uniform-10000-0.1.png b/parallel/spadd/graph/speedup/uniform-10000-0.1.png
new file mode 100644
index 00000000..639e8141
Binary files /dev/null and b/parallel/spadd/graph/speedup/uniform-10000-0.1.png differ
diff --git a/parallel/spadd/graph/speedup/uniform-1000000-3000000.png b/parallel/spadd/graph/speedup/uniform-1000000-3000000.png
new file mode 100644
index 00000000..57634862
Binary files /dev/null and b/parallel/spadd/graph/speedup/uniform-1000000-3000000.png differ
diff --git a/parallel/spadd/parallel_col_separate_sparselist_results.jl b/parallel/spadd/parallel_col_separate_sparselist_results.jl
new file mode 100644
index 00000000..ac358383
--- /dev/null
+++ b/parallel/spadd/parallel_col_separate_sparselist_results.jl
@@ -0,0 +1,19 @@
+using Finch
+using BenchmarkTools
+
+
+function parallel_col_separate_sparselist_results_add(A, B)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _B = Tensor(Dense(SparseList(Element(0.0))), B)
+        time = @belapsed begin
+                (_A, _B) = $(_A, _B)
+                global _C = Tensor(Dense(Separate(SparseList(Element(0.0)))))
+                @finch mode = :fast begin
+                        _C .= 0
+                        for j = parallel(_), i = _
+                                _C[i, j] = _A[i, j] + _B[i, j]
+                        end
+                end
+        end
+        return (; time=time, C=_C)
+end
diff --git a/parallel/spadd/results/spadd_10_threads.json b/parallel/spadd/results/spadd_10_threads.json
new file mode 100644
index 00000000..6a617ee0
--- /dev/null
+++ b/parallel/spadd/results/spadd_10_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.00759134,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.048477826,
+        "n_threads": 10,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.003060656,
+        "n_threads": 10,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.140822392,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.343739416,
+        "n_threads": 10,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.025895345,
+        "n_threads": 10,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002042569,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004510134,
+        "n_threads": 10,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000682558,
+        "n_threads": 10,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.671362515,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.122503556,
+        "n_threads": 10,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.059856956,
+        "n_threads": 10,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.142538191,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.792686368,
+        "n_threads": 10,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.036291917,
+        "n_threads": 10,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_11_threads.json b/parallel/spadd/results/spadd_11_threads.json
new file mode 100644
index 00000000..165c87cf
--- /dev/null
+++ b/parallel/spadd/results/spadd_11_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007813918,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.047563831,
+        "n_threads": 11,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.002597417,
+        "n_threads": 11,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.136202874,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.33770435,
+        "n_threads": 11,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.025623614,
+        "n_threads": 11,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002113507,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004420404,
+        "n_threads": 11,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00067307,
+        "n_threads": 11,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.674409652,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.108030634,
+        "n_threads": 11,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.064515586,
+        "n_threads": 11,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.158126594,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.636382679,
+        "n_threads": 11,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.031231612,
+        "n_threads": 11,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_12_threads.json b/parallel/spadd/results/spadd_12_threads.json
new file mode 100644
index 00000000..ed6fb321
--- /dev/null
+++ b/parallel/spadd/results/spadd_12_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.00775952,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.047737394,
+        "n_threads": 12,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.002776356,
+        "n_threads": 12,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.117517579,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.340512482,
+        "n_threads": 12,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.016165751,
+        "n_threads": 12,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002104756,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004340178,
+        "n_threads": 12,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000680325,
+        "n_threads": 12,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.660803585,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.117194109,
+        "n_threads": 12,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.068447556,
+        "n_threads": 12,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.146184782,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.830222054,
+        "n_threads": 12,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.03003548,
+        "n_threads": 12,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_1_threads.json b/parallel/spadd/results/spadd_1_threads.json
new file mode 100644
index 00000000..769e9158
--- /dev/null
+++ b/parallel/spadd/results/spadd_1_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007658117,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.062250052,
+        "n_threads": 1,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.010286825,
+        "n_threads": 1,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.133347393,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.505824712,
+        "n_threads": 1,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.119929662,
+        "n_threads": 1,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00200161,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.007409245,
+        "n_threads": 1,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002621693,
+        "n_threads": 1,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.67302425,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.411521742,
+        "n_threads": 1,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.398239441,
+        "n_threads": 1,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.093154071,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 4.521424105,
+        "n_threads": 1,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.217809633,
+        "n_threads": 1,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_2_threads.json b/parallel/spadd/results/spadd_2_threads.json
new file mode 100644
index 00000000..e3809667
--- /dev/null
+++ b/parallel/spadd/results/spadd_2_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.008053671,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.058495541,
+        "n_threads": 2,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.005601081,
+        "n_threads": 2,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.134318176,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.413190171,
+        "n_threads": 2,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.049217263,
+        "n_threads": 2,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002080681,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.007421289,
+        "n_threads": 2,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001444168,
+        "n_threads": 2,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.660950071,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.256817776,
+        "n_threads": 2,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.274736245,
+        "n_threads": 2,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.089100803,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.755418452,
+        "n_threads": 2,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.093467113,
+        "n_threads": 2,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_3_threads.json b/parallel/spadd/results/spadd_3_threads.json
new file mode 100644
index 00000000..d553cac4
--- /dev/null
+++ b/parallel/spadd/results/spadd_3_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007815171,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.057860099,
+        "n_threads": 3,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.003969577,
+        "n_threads": 3,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.132775556,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.370948929,
+        "n_threads": 3,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.029833378,
+        "n_threads": 3,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002027846,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.006872266,
+        "n_threads": 3,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00130002,
+        "n_threads": 3,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.672536501,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.195648383,
+        "n_threads": 3,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.230772324,
+        "n_threads": 3,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.142210671,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.74732249,
+        "n_threads": 3,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.061282212,
+        "n_threads": 3,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_4_threads.json b/parallel/spadd/results/spadd_4_threads.json
new file mode 100644
index 00000000..9b27cbb0
--- /dev/null
+++ b/parallel/spadd/results/spadd_4_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007712467,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.054351314,
+        "n_threads": 4,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.004878618,
+        "n_threads": 4,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.132065505,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.374797471,
+        "n_threads": 4,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.032860422,
+        "n_threads": 4,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002002598,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.005143974,
+        "n_threads": 4,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000868699,
+        "n_threads": 4,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.669091315,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.161954816,
+        "n_threads": 4,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.136907266,
+        "n_threads": 4,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.15480946,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.67523947,
+        "n_threads": 4,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.043427596,
+        "n_threads": 4,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_5_threads.json b/parallel/spadd/results/spadd_5_threads.json
new file mode 100644
index 00000000..2d350891
--- /dev/null
+++ b/parallel/spadd/results/spadd_5_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007722766,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.052393424,
+        "n_threads": 5,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.004978728,
+        "n_threads": 5,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.117776194,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.34473256,
+        "n_threads": 5,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.027384202,
+        "n_threads": 5,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002031694,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.005564533,
+        "n_threads": 5,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000879534,
+        "n_threads": 5,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.64254676,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.158092057,
+        "n_threads": 5,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.129361777,
+        "n_threads": 5,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.156580994,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.625150086,
+        "n_threads": 5,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.053426307,
+        "n_threads": 5,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_6_threads.json b/parallel/spadd/results/spadd_6_threads.json
new file mode 100644
index 00000000..f8802068
--- /dev/null
+++ b/parallel/spadd/results/spadd_6_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007953295,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.051207333,
+        "n_threads": 6,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.004370187,
+        "n_threads": 6,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.137860884,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.340840468,
+        "n_threads": 6,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.030216199,
+        "n_threads": 6,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002081867,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00526826,
+        "n_threads": 6,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001013415,
+        "n_threads": 6,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.67727586,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.141259518,
+        "n_threads": 6,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.108764424,
+        "n_threads": 6,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.138605195,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.624903508,
+        "n_threads": 6,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.035666361,
+        "n_threads": 6,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_7_threads.json b/parallel/spadd/results/spadd_7_threads.json
new file mode 100644
index 00000000..70631b6a
--- /dev/null
+++ b/parallel/spadd/results/spadd_7_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007921264,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.049803029,
+        "n_threads": 7,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.003912309,
+        "n_threads": 7,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.134428848,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.341892755,
+        "n_threads": 7,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.031928255,
+        "n_threads": 7,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002088799,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004972098,
+        "n_threads": 7,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000896116,
+        "n_threads": 7,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.681039133,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.126025398,
+        "n_threads": 7,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.102670088,
+        "n_threads": 7,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.172220199,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.598130947,
+        "n_threads": 7,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.035217355,
+        "n_threads": 7,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_8_threads.json b/parallel/spadd/results/spadd_8_threads.json
new file mode 100644
index 00000000..ba4fafad
--- /dev/null
+++ b/parallel/spadd/results/spadd_8_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007804556,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.049307963,
+        "n_threads": 8,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.003644985,
+        "n_threads": 8,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.119541293,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.352797055,
+        "n_threads": 8,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.024231367,
+        "n_threads": 8,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002077453,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004788928,
+        "n_threads": 8,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000702856,
+        "n_threads": 8,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.655951213,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.14574732,
+        "n_threads": 8,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.127390267,
+        "n_threads": 8,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.156350457,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.643876252,
+        "n_threads": 8,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.040533781,
+        "n_threads": 8,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/results/spadd_9_threads.json b/parallel/spadd/results/spadd_9_threads.json
new file mode 100644
index 00000000..9fed7dfa
--- /dev/null
+++ b/parallel/spadd/results/spadd_9_threads.json
@@ -0,0 +1,134 @@
+[
+    {
+        "time": 0.007863298,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.048378622,
+        "n_threads": 9,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.003199436,
+        "n_threads": 9,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.132222928,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.338526563,
+        "n_threads": 9,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.026502123,
+        "n_threads": 9,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002097927,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004664288,
+        "n_threads": 9,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000671842,
+        "n_threads": 9,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.680030172,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.116336138,
+        "n_threads": 9,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.073227472,
+        "n_threads": 9,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 10000,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.154322425,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 3.624687224,
+        "n_threads": 9,
+        "method": "parallel_col_separate_sparselist_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.03488489,
+        "n_threads": 9,
+        "method": "separated_memory_concatenate_results",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1000000,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spadd/run.sh b/parallel/spadd/run.sh
new file mode 100755
index 00000000..170c1259
--- /dev/null
+++ b/parallel/spadd/run.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+for (( t=1 ; t<=$1 ; t++));
+do
+    echo "Running run_spadd.jl with $t threads"
+    julia "--threads=$t" "run_spadd.jl" 
+done
diff --git a/parallel/spadd/run_local.sh b/parallel/spadd/run_local.sh
new file mode 100755
index 00000000..d4525b32
--- /dev/null
+++ b/parallel/spadd/run_local.sh
@@ -0,0 +1 @@
+source run.sh 4
diff --git a/parallel/spadd/run_slurm.sl b/parallel/spadd/run_slurm.sl
new file mode 100644
index 00000000..8b3de32f
--- /dev/null
+++ b/parallel/spadd/run_slurm.sl
@@ -0,0 +1,12 @@
+#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=24
+#SBATCH --exclusive
+#SBATCH -t 12:00:00
+#SBATCH --partition=lanka-v3
+#SBATCH --qos=commit-main
+#SBATCH --mem 102400
+cd /data/scratch/paramuth/FinchBenchmarks/parallel/spadd
+source run.sh 12
+
diff --git a/parallel/spadd/run_spadd.jl b/parallel/spadd/run_spadd.jl
new file mode 100644
index 00000000..02e59158
--- /dev/null
+++ b/parallel/spadd/run_spadd.jl
@@ -0,0 +1,125 @@
+using Base: nothing_sentinel
+#!/usr/bin/env julia
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(dirname(@__DIR__))
+    Pkg.instantiate()
+end
+include("../../deps/diagnostics.jl")
+print_diagnostics()
+
+using MatrixDepot
+using BenchmarkTools
+using ArgParse
+using DataStructures
+using JSON
+using Random
+
+Random.seed!(1234)
+
+# Parsing Arguments
+s = ArgParseSettings("Run Parallel SpAdd Experiments.")
+@add_arg_table! s begin
+    "--output", "-o"
+    arg_type = String
+    help = "output file path"
+    "--dataset", "-d"
+    arg_type = String
+    help = "dataset keyword"
+    "--method", "-m"
+    arg_type = String
+    help = "method keyword"
+    "--accuracy-check", "-a"
+    action = :store_true
+    help = "check method accuracy"
+end
+parsed_args = parse_args(ARGS, s)
+
+# Mapping from dataset types to datasets
+datasets = Dict(
+    "uniform" => [
+        OrderedDict("size" => 1_000, "sparsity" => 0.1),
+        OrderedDict("size" => 10_000, "sparsity" => 0.1),
+        OrderedDict("size" => 1_000_000, "sparsity" => 3_000_000),
+    ],
+    "FEMLAB" => [
+        "FEMLAB/poisson3Da",
+        "FEMLAB/poisson3Db",
+    ],
+)
+
+# Mapping from method keywords to methods
+include("serial_default_implementation.jl")
+# include("parallel_col_separate_sparselist_results.jl")
+include("separated_memory_concatenate_results.jl")
+
+methods = OrderedDict(
+    "serial_default_implementation" => serial_default_implementation_add,
+    # "parallel_col_separate_sparselist_results" => parallel_col_separate_sparselist_results_add,
+    "separated_memory_concatenate_results" => separated_memory_concatenate_results_add,
+)
+
+if !isnothing(parsed_args["method"])
+    method_name = parsed_args["method"]
+    @assert haskey(methods, method_name) "Unrecognize method for $method_name"
+    methods = OrderedDict(
+        method_name => methods[method_name]
+    )
+end
+
+function calculate_results(dataset, mtxs, results)
+    for mtx in mtxs
+        # Get relevant matrix
+        if dataset == "uniform"
+            A = fsprand(mtx["size"], mtx["size"], mtx["sparsity"])
+            B = fsprand(mtx["size"], mtx["size"], mtx["sparsity"])
+        elseif dataset == "FEMLAB"
+            A = matrixdepot(mtx)
+            row_permutation = randperm(size(A, 1))
+            col_permutation = randperm(size(A, 2))
+            B = A[row_permutation, col_permutation]
+        else
+            throw(ArgumentError("Cannot recognize dataset: $dataset"))
+        end
+
+        for (key, method) in methods
+            result = method(A, B)
+
+            if parsed_args["accuracy-check"]
+                # Check the result of the multiplication
+                serial_default_implementation_result = serial_default_implementation_add(A, B)
+                @assert result.C == serial_default_implementation_result.C "Incorrect result for $key"
+            end
+
+            # Write result
+            time = result.time
+            @info "result for $key on $mtx" time
+            push!(results, OrderedDict(
+                "time" => time,
+                "n_threads" => Threads.nthreads(),
+                "method" => key,
+                "dataset" => dataset,
+                "matrix" => mtx,
+            ))
+
+            if isnothing(parsed_args["output"])
+                write("results/spadd_$(Threads.nthreads())_threads.json", JSON.json(results, 4))
+            else
+                write(parsed_args["output"], JSON.json(results, 4))
+            end
+        end
+    end
+end
+
+results = []
+if isnothing(parsed_args["dataset"])
+    for (dataset, mtxs) in datasets
+        calculate_results(dataset, mtxs, results)
+    end
+else
+    dataset = parsed_args["dataset"]
+    mtxs = datasets[dataset]
+    calculate_results(dataset, mtxs, results)
+end
+
+
diff --git a/parallel/spadd/separated_memory_concatenate_results.jl b/parallel/spadd/separated_memory_concatenate_results.jl
new file mode 100644
index 00000000..9b990f2b
--- /dev/null
+++ b/parallel/spadd/separated_memory_concatenate_results.jl
@@ -0,0 +1,108 @@
+using Finch
+using BenchmarkTools
+using Base.Threads
+
+include("concat.jl")
+
+function separated_memory_concatenate_results_add(A, B)
+    _A = Tensor(Dense(SparseList(Element(0.0))), A)
+    _B = Tensor(Dense(SparseList(Element(0.0))), B)
+    time = @belapsed begin
+        (_A, _B) = $(_A, _B)
+        num_threads = Threads.nthreads()
+        partial_sum = Vector{Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}(undef, num_threads)
+        partial_nonzero_ptr = Vector{Int64}(undef, num_threads + 1)
+        partial_nonzero_ptr[1] = 0
+        partial_column = Vector{Int64}(undef, num_threads + 1)
+        partial_column[1] = 0
+
+        num_col = size(_A)[2]
+        Threads.@threads for k = 1:num_threads
+            start_col = 1 + div((k - 1) * num_col, num_threads)
+            stop_col = div(k * num_col, num_threads)
+            partial_column[k+1] = stop_col
+            result = partial_add(_A, _B, start_col, stop_col)
+            partial_sum[k] = result.tensor
+            partial_nonzero_ptr[k+1] = result.num_nonzero
+        end
+
+        for i in 2:length(partial_nonzero_ptr)
+            partial_nonzero_ptr[i] += partial_nonzero_ptr[i-1]
+        end
+
+        global _C = concat_vec(partial_sum, partial_nonzero_ptr, partial_column)
+    end
+    return (; time=time, C=_C)
+end
+
+# Add A and B from column start_col to stop_col (inclusive)
+function partial_add(A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, B::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, start_col::Int64, stop_col::Int64)
+    @inbounds @fastmath(begin
+        A_lvl = A.lvl # DenseLevel
+        A_lvl_2 = A_lvl.lvl # SparseListLevel
+        A_lvl_ptr = A_lvl_2.ptr # Vector{Int64}
+        A_lvl_idx = A_lvl_2.idx # Vector{Int64}
+        # A_lvl_3 = A_lvl_2.lvl # ElementLevel
+        A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64}
+
+        B_lvl = B.lvl # DenseLevel
+        B_lvl_2 = B_lvl.lvl # SparseListLevel
+        B_lvl_ptr = B_lvl_2.ptr # Vector{Int64}
+        B_lvl_idx = B_lvl_2.idx # Vector{Int64}
+        # B_lvl_3 = B_lvl_2.lvl # ElementLevel
+        B_lvl_2_val = B_lvl_2.lvl.val # Vector{Float64}
+
+        # Assertion
+        # A_lvl.shape == B_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl.shape) != $(B_lvl.shape))"))
+        # A_lvl_2.shape == B_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl_2.shape) != $(B_lvl_2.shape))"))
+        # A_lvl.shape >= stop_col || throw(DimensionMismatch("mismatched dimension limits ($(A_lvl.shape) < $(stop_col))"))
+
+        C_lvl_2_val = Vector{Float64}()
+        C_lvl_idx = Vector{Int64}()
+        C_lvl_ptr = Vector{Int64}([1])
+        current_ptr = 1
+        for j = start_col:stop_col
+            A_idx = A_lvl_ptr[j]
+            B_idx = B_lvl_ptr[j]
+            while A_idx < A_lvl_ptr[j+1] && B_idx < B_lvl_ptr[j+1]
+                current_ptr += 1
+                A_row_idx = A_lvl_idx[A_idx]
+                B_row_idx = B_lvl_idx[B_idx]
+
+                if A_row_idx < B_row_idx
+                    push!(C_lvl_2_val, A_lvl_2_val[A_idx])
+                    push!(C_lvl_idx, A_row_idx)
+                    A_idx += 1
+                elseif A_row_idx > B_row_idx
+                    push!(C_lvl_2_val, B_lvl_2_val[B_idx])
+                    push!(C_lvl_idx, B_row_idx)
+                    B_idx += 1
+                else
+                    push!(C_lvl_2_val, A_lvl_2_val[A_idx] + B_lvl_2_val[B_idx])
+                    push!(C_lvl_idx, A_row_idx)
+                    A_idx += 1
+                    B_idx += 1
+                end
+            end
+
+            append!(C_lvl_2_val, @view A_lvl_2_val[A_idx:A_lvl_ptr[j+1]-1])
+            append!(C_lvl_idx, @view A_lvl_idx[A_idx:A_lvl_ptr[j+1]-1])
+            current_ptr += A_lvl_ptr[j+1] - A_idx
+
+            append!(C_lvl_2_val, @view B_lvl_2_val[B_idx:B_lvl_ptr[j+1]-1])
+            append!(C_lvl_idx, @view B_lvl_idx[B_idx:B_lvl_ptr[j+1]-1])
+            current_ptr += B_lvl_ptr[j+1] - B_idx
+
+            append!(C_lvl_ptr, current_ptr)
+        end
+
+        C_lvl_3 = Element{0.0,Float64,Int64}(C_lvl_2_val)
+        C_lvl_shape = A_lvl_2.shape
+
+        C_lvl_2 = SparseList{Int64}(C_lvl_3, C_lvl_shape, C_lvl_ptr, C_lvl_idx)
+        C_lvl = Dense{Int64}(C_lvl_2, stop_col - start_col + 1)
+
+        C = Tensor(C_lvl)
+        return (tensor=C, num_nonzero=current_ptr - 1)
+    end)
+end
diff --git a/parallel/spadd/serial_default_implementation.jl b/parallel/spadd/serial_default_implementation.jl
new file mode 100644
index 00000000..645081aa
--- /dev/null
+++ b/parallel/spadd/serial_default_implementation.jl
@@ -0,0 +1,19 @@
+using Finch
+using BenchmarkTools
+
+
+function serial_default_implementation_add(A, B)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _B = Tensor(Dense(SparseList(Element(0.0))), B)
+        time = @belapsed begin
+                (_A, _B) = $(_A, _B)
+                global _C = Tensor(Dense(SparseList(Element(0.0))))
+                @finch mode = :fast begin
+                        _C .= 0
+                        for j = _, i = _
+                                _C[i, j] = _A[i, j] + _B[i, j]
+                        end
+                end
+        end
+        return (; time=time, C=_C)
+end
diff --git a/parallel/spmv/.gitignore b/parallel/spmv/.gitignore
new file mode 100644
index 00000000..4902375f
--- /dev/null
+++ b/parallel/spmv/.gitignore
@@ -0,0 +1,2 @@
+Manifest.toml
+slurm*
diff --git a/parallel/spmv/dynamic_rows_grain.jl b/parallel/spmv/dynamic_rows_grain.jl
new file mode 100644
index 00000000..fb4a092a
--- /dev/null
+++ b/parallel/spmv/dynamic_rows_grain.jl
@@ -0,0 +1,52 @@
+using Finch
+using BenchmarkTools
+
+function dynamic_rows_grain_helper(grain::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        tns_lvl = A.body.lvl
+        tns_lvl_2 = tns_lvl.lvl
+        tns_lvl_ptr = tns_lvl_2.ptr
+        tns_lvl_idx = tns_lvl_2.idx
+        tns_lvl_2_val = tns_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape)
+        val = y_lvl_val
+        y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads()))
+        x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads()))
+        tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads()))
+        tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads()))
+        tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads()))
+
+        Threads.@threads for group = 1:cld(tns_lvl.shape, grain)
+            Finch.@barrier begin
+                @inbounds @fastmath(begin
+                    for i = (group-1)*grain+1:min(tns_lvl.shape, group * grain)
+                        for ptr = tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1
+                            val[i] += tns_lvl_2_val[ptr] * x_lvl_val[tns_lvl_idx[ptr]]
+                        end
+                    end
+                end)
+            end
+        end
+
+        resize!(val, tns_lvl.shape)
+    end)
+end
+
+function dynamic_rows_grain_generator(grain)
+    return (y, A, x) -> dynamic_rows_grain(grain, y, A, x)
+end
+
+function dynamic_rows_grain(grain, y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+
+    time = @belapsed dynamic_rows_grain_helper($grain, $_y, $_A, $_x)
+    return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/finch_parallel.jl b/parallel/spmv/finch_parallel.jl
new file mode 100644
index 00000000..6c0c1e0e
--- /dev/null
+++ b/parallel/spmv/finch_parallel.jl
@@ -0,0 +1,19 @@
+using Finch
+using BenchmarkTools
+
+
+function finch_parallel(y, A, x)
+        _y = Tensor(Dense(Element(0.0)), y)
+        _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (_y, _A, _x) = $(_y, _A, _x)
+                @finch mode = :fast begin
+                        _y .= 0
+                        for i = parallel(_), j = _
+                                _y[i] += _A[i, j] * _x[j]
+                        end
+                end
+        end
+        return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/graph.py b/parallel/spmv/graph.py
new file mode 100644
index 00000000..f4f8d955
--- /dev/null
+++ b/parallel/spmv/graph.py
@@ -0,0 +1,219 @@
+import json
+import os
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+
+GRAPH_FOLDER = "graph"
+SPEEDUP_FOLDER = "speedup"
+RUNTIME_FOLDER = "runtime"
+RESULTS_FOLDER = "results"
+MEAN_SPEEDUP_FOLDER = "mean-speedup"
+
+NTHREADS = [i + 1 for i in range(12)]
+
+DEFAULT_METHOD = "serial_default_implementation"
+METHODS = [
+    # DEFAULT_METHOD,
+    # "finch_parallel",
+    "static_rows_equal",
+    "dynamic_rows_grain_1",
+    "dynamic_rows_grain_10",
+    "merge",
+    # "graph_partition_reorder_merge",
+    "graph_partition_weighted_reorder_merge",
+]
+
+DATASETS = {
+    "uniform": ["1024-0.1", "8192-0.1", "1048576-3000000"],
+    "FEMLAB": ["FEMLAB-poisson3Da", "FEMLAB-poisson3Db"],
+    "vanHeukelum": [
+        "vanHeukelum-cage10",
+        # "vanHeukelum-cage11",
+        # "vanHeukelum-cage12"
+    ],
+    "Williams": ["Williams-webbase-1M"],
+}
+NUM_MATRICES = sum([len(matrices) for matrices in DATASETS.values()])
+
+COLORS = [
+    "gray",
+    "cadetblue",
+    "saddlebrown",
+    "navy",
+    "black",
+    "orange",
+    "green",
+    "red",
+    "purple",
+]
+
+
+def load_json():
+    combine_results = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: {})))
+    for n_thread in NTHREADS:
+        results_json = json.load(
+            open(f"{RESULTS_FOLDER}/spmv_{n_thread}_threads.json", "r")
+        )
+        for result in results_json:
+
+            matrix = (
+                result["matrix"].replace("/", "-")
+                if result["dataset"] != "uniform"
+                else f"{result['matrix']['size']}-{result['matrix']['sparsity']}"
+            )
+            combine_results[result["dataset"]][matrix][result["method"]][
+                result["n_threads"]
+            ] = result["time"]
+
+    return combine_results
+
+
+def plot_speedup_result(results, dataset, matrix, save_location):
+    plt.figure(figsize=(10, 10))
+    for method, color in zip(METHODS, COLORS):
+        plt.plot(
+            NTHREADS,
+            [
+                results[dataset][matrix][DEFAULT_METHOD][n_thread]
+                / results[dataset][matrix][method][n_thread]
+                for n_thread in NTHREADS
+            ],
+            label=method,
+            color=color,
+            marker="o",
+            linestyle="-",
+            linewidth=1,
+        )
+
+    plt.title(f"Speedup for {dataset}: {matrix} (with respect to {DEFAULT_METHOD})")
+    # plt.yscale("log", base=10)
+    plt.xticks(NTHREADS)
+    plt.xlabel("Number of Threads")
+    plt.ylabel(f"Speedup")
+
+    plt.legend()
+    plt.savefig(save_location)
+    plt.close()
+
+
+def plot_runtime_result(results, dataset, matrix, save_location):
+    plt.figure(figsize=(10, 10))
+    for method, color in zip(METHODS, COLORS):
+        plt.plot(
+            NTHREADS,
+            [results[dataset][matrix][method][n_thread] for n_thread in NTHREADS],
+            label=method,
+            color=color,
+            marker="o",
+            linestyle="-",
+            linewidth=1,
+        )
+
+    plt.title(f"Runtime for {dataset}: {matrix}")
+    # plt.yscale("log", base=10)
+    plt.xticks(NTHREADS)
+    plt.xlabel("Number of Threads")
+    plt.ylabel(f"Runtime (in seconds)")
+
+    plt.legend()
+    plt.savefig(save_location)
+    plt.close()
+
+
+def plot_mean_speedup_result(results, save_location):
+    plt.figure(figsize=(10, 10))
+    for method, color in zip(METHODS, COLORS):
+        speedups = [1] * len(NTHREADS)
+        for dataset, matrices in DATASETS.items():
+            for matrix in matrices:
+                for i, n_thread in enumerate(NTHREADS):
+                    speedups[i] *= (
+                        results[dataset][matrix][DEFAULT_METHOD][n_thread]
+                        / results[dataset][matrix][method][n_thread]
+                    )
+
+        mean_speedups = [speedup ** (1 / NUM_MATRICES) for speedup in speedups]
+        plt.plot(
+            NTHREADS,
+            mean_speedups,
+            label=method,
+            color=color,
+            marker="o",
+            linestyle="-",
+            linewidth=1,
+        )
+
+    plt.title(f"Geometric Mean Speedup (with respect to {DEFAULT_METHOD})")
+    # plt.yscale("log", base=10)
+    plt.xticks(NTHREADS)
+    plt.xlabel("Number of Threads")
+    plt.ylabel(f"Speedup")
+
+    plt.legend()
+    plt.savefig(save_location)
+    plt.close()
+
+
+def plot_mean_speedup_separate_result(results, save_folder):
+    for method, color in zip(METHODS, COLORS):
+        plt.figure(figsize=(10, 10))
+        speedups = [1] * len(NTHREADS)
+        for dataset, matrices in DATASETS.items():
+            for matrix in matrices:
+                for i, n_thread in enumerate(NTHREADS):
+                    speedups[i] *= (
+                        results[dataset][matrix][DEFAULT_METHOD][n_thread]
+                        / results[dataset][matrix][method][n_thread]
+                    )
+
+        mean_speedups = [speedup ** (1 / NUM_MATRICES) for speedup in speedups]
+        plt.plot(
+            NTHREADS,
+            mean_speedups,
+            label=method,
+            color=color,
+            marker="o",
+            linestyle="-",
+            linewidth=1,
+        )
+
+        plt.title(
+            f"Geometric Mean Speedup for {method} (with respect to {DEFAULT_METHOD})"
+        )
+        # plt.yscale("log", base=10)
+        plt.xticks(NTHREADS)
+        plt.xlabel("Number of Threads")
+        plt.ylabel(f"Speedup")
+
+        plt.legend()
+        plt.savefig(os.path.join(save_folder, f"{method}-mean-speedup.png"))
+        plt.close()
+
+
+if __name__ == "__main__":
+    os.makedirs(os.path.join(GRAPH_FOLDER, SPEEDUP_FOLDER), exist_ok=True)
+    os.makedirs(os.path.join(GRAPH_FOLDER, RUNTIME_FOLDER), exist_ok=True)
+    os.makedirs(os.path.join(GRAPH_FOLDER, MEAN_SPEEDUP_FOLDER), exist_ok=True)
+
+    results = load_json()
+    for dataset, matrices in DATASETS.items():
+        for matrix in matrices:
+            plot_speedup_result(
+                results,
+                dataset,
+                matrix,
+                os.path.join(GRAPH_FOLDER, SPEEDUP_FOLDER, f"{dataset}-{matrix}.png"),
+            )
+            plot_runtime_result(
+                results,
+                dataset,
+                matrix,
+                os.path.join(GRAPH_FOLDER, RUNTIME_FOLDER, f"{dataset}-{matrix}.png"),
+            )
+
+    plot_mean_speedup_result(results, os.path.join(GRAPH_FOLDER, "mean-speedup.png"))
+
+    plot_mean_speedup_separate_result(
+        results, os.path.join(GRAPH_FOLDER, MEAN_SPEEDUP_FOLDER)
+    )
diff --git a/parallel/spmv/graph/mean-speedup.png b/parallel/spmv/graph/mean-speedup.png
new file mode 100644
index 00000000..dfcc4a57
Binary files /dev/null and b/parallel/spmv/graph/mean-speedup.png differ
diff --git a/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_1-mean-speedup.png b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_1-mean-speedup.png
new file mode 100644
index 00000000..05d55367
Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_1-mean-speedup.png differ
diff --git a/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_10-mean-speedup.png b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_10-mean-speedup.png
new file mode 100644
index 00000000..3820bd4d
Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/dynamic_rows_grain_10-mean-speedup.png differ
diff --git a/parallel/spmv/graph/mean-speedup/graph_partition_weighted_reorder_merge-mean-speedup.png b/parallel/spmv/graph/mean-speedup/graph_partition_weighted_reorder_merge-mean-speedup.png
new file mode 100644
index 00000000..81ab1a4d
Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/graph_partition_weighted_reorder_merge-mean-speedup.png differ
diff --git a/parallel/spmv/graph/mean-speedup/merge-mean-speedup.png b/parallel/spmv/graph/mean-speedup/merge-mean-speedup.png
new file mode 100644
index 00000000..35d7abf6
Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/merge-mean-speedup.png differ
diff --git a/parallel/spmv/graph/mean-speedup/static_rows_equal-mean-speedup.png b/parallel/spmv/graph/mean-speedup/static_rows_equal-mean-speedup.png
new file mode 100644
index 00000000..df1e6969
Binary files /dev/null and b/parallel/spmv/graph/mean-speedup/static_rows_equal-mean-speedup.png differ
diff --git a/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png
new file mode 100644
index 00000000..c3679b28
Binary files /dev/null and b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Da.png differ
diff --git a/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png
new file mode 100644
index 00000000..2b54bf12
Binary files /dev/null and b/parallel/spmv/graph/runtime/FEMLAB-FEMLAB-poisson3Db.png differ
diff --git a/parallel/spmv/graph/runtime/Williams-Williams-webbase-1M.png b/parallel/spmv/graph/runtime/Williams-Williams-webbase-1M.png
new file mode 100644
index 00000000..d143e6d2
Binary files /dev/null and b/parallel/spmv/graph/runtime/Williams-Williams-webbase-1M.png differ
diff --git a/parallel/spmv/graph/runtime/uniform-1024-0.1.png b/parallel/spmv/graph/runtime/uniform-1024-0.1.png
new file mode 100644
index 00000000..8955433b
Binary files /dev/null and b/parallel/spmv/graph/runtime/uniform-1024-0.1.png differ
diff --git a/parallel/spmv/graph/runtime/uniform-1048576-3000000.png b/parallel/spmv/graph/runtime/uniform-1048576-3000000.png
new file mode 100644
index 00000000..5a1c8bfe
Binary files /dev/null and b/parallel/spmv/graph/runtime/uniform-1048576-3000000.png differ
diff --git a/parallel/spmv/graph/runtime/uniform-8192-0.1.png b/parallel/spmv/graph/runtime/uniform-8192-0.1.png
new file mode 100644
index 00000000..3ac940dc
Binary files /dev/null and b/parallel/spmv/graph/runtime/uniform-8192-0.1.png differ
diff --git a/parallel/spmv/graph/runtime/vanHeukelum-vanHeukelum-cage10.png b/parallel/spmv/graph/runtime/vanHeukelum-vanHeukelum-cage10.png
new file mode 100644
index 00000000..eac51f52
Binary files /dev/null and b/parallel/spmv/graph/runtime/vanHeukelum-vanHeukelum-cage10.png differ
diff --git a/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png
new file mode 100644
index 00000000..8a4e8692
Binary files /dev/null and b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Da.png differ
diff --git a/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png
new file mode 100644
index 00000000..30d8405e
Binary files /dev/null and b/parallel/spmv/graph/speedup/FEMLAB-FEMLAB-poisson3Db.png differ
diff --git a/parallel/spmv/graph/speedup/Williams-Williams-webbase-1M.png b/parallel/spmv/graph/speedup/Williams-Williams-webbase-1M.png
new file mode 100644
index 00000000..53191679
Binary files /dev/null and b/parallel/spmv/graph/speedup/Williams-Williams-webbase-1M.png differ
diff --git a/parallel/spmv/graph/speedup/uniform-1024-0.1.png b/parallel/spmv/graph/speedup/uniform-1024-0.1.png
new file mode 100644
index 00000000..567ee6c6
Binary files /dev/null and b/parallel/spmv/graph/speedup/uniform-1024-0.1.png differ
diff --git a/parallel/spmv/graph/speedup/uniform-1048576-3000000.png b/parallel/spmv/graph/speedup/uniform-1048576-3000000.png
new file mode 100644
index 00000000..2bd8ca34
Binary files /dev/null and b/parallel/spmv/graph/speedup/uniform-1048576-3000000.png differ
diff --git a/parallel/spmv/graph/speedup/uniform-8192-0.1.png b/parallel/spmv/graph/speedup/uniform-8192-0.1.png
new file mode 100644
index 00000000..023181c1
Binary files /dev/null and b/parallel/spmv/graph/speedup/uniform-8192-0.1.png differ
diff --git a/parallel/spmv/graph/speedup/vanHeukelum-vanHeukelum-cage10.png b/parallel/spmv/graph/speedup/vanHeukelum-vanHeukelum-cage10.png
new file mode 100644
index 00000000..698a4418
Binary files /dev/null and b/parallel/spmv/graph/speedup/vanHeukelum-vanHeukelum-cage10.png differ
diff --git a/parallel/spmv/graph_partition_reorder_merge.jl b/parallel/spmv/graph_partition_reorder_merge.jl
new file mode 100644
index 00000000..ebd4b745
--- /dev/null
+++ b/parallel/spmv/graph_partition_reorder_merge.jl
@@ -0,0 +1,18 @@
+using Finch
+using BenchmarkTools
+
+include("utils/merge.jl")
+include("utils/permutation.jl")
+
+function graph_partition_reorder_merge(y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+
+    perm = create_permutation(_A)
+    _A = matrix_col_permutation(_A, perm)
+    time = @belapsed merge_helper($_y, $_A, $_x)
+    _y = vector_permutation(_y, invperm(perm))
+    return (; time=time, y=_y)
+end
+
diff --git a/parallel/spmv/graph_partition_weighted_reorder_merge.jl b/parallel/spmv/graph_partition_weighted_reorder_merge.jl
new file mode 100644
index 00000000..2155d834
--- /dev/null
+++ b/parallel/spmv/graph_partition_weighted_reorder_merge.jl
@@ -0,0 +1,18 @@
+using Finch
+using BenchmarkTools
+
+include("utils/merge.jl")
+include("utils/permutation.jl")
+
+function graph_partition_weighted_reorder_merge(y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+
+    perm = create_weighted_permutation(_A)
+    _A = matrix_col_permutation(_A, perm)
+    time = @belapsed merge_helper($_y, $_A, $_x)
+    _y = vector_permutation(_y, invperm(perm))
+    return (; time=time, y=_y)
+end
+
diff --git a/parallel/spmv/merge.jl b/parallel/spmv/merge.jl
new file mode 100644
index 00000000..f4dc1daf
--- /dev/null
+++ b/parallel/spmv/merge.jl
@@ -0,0 +1,14 @@
+using Finch
+using BenchmarkTools
+
+include("utils/merge.jl")
+
+function merge(y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+
+    time = @belapsed merge_swizzle_helper($_y, $_A, $_x)
+    return (; time=time, y=_y)
+end
+
diff --git a/parallel/spmv/results/spmv_10_threads.json b/parallel/spmv/results/spmv_10_threads.json
new file mode 100644
index 00000000..50894f41
--- /dev/null
+++ b/parallel/spmv/results/spmv_10_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009643868,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003440099,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002845301,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.06236093,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.009149666,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002606548,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002587723,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002533871,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000167899,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.8454e-5,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 3.873e-5,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000506439,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 8.9795e-5,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.0905e-5,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.2894e-5,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.1114e-5,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000468416,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000120169,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000108414,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000667579,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000170153,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.3415e-5,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.8251e-5,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.6844e-5,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008116175,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002274642,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001714347,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.008215332,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002621321,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001337518,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000735361,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000793144,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.5527e-5,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.4684e-5,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.327e-5,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 6.8314e-5,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5987e-5,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6707e-5,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5733e-5,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.676e-5,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.017556801,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002451618,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001776321,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002033005,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001814307,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001778023,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001815023,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001790017,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.024290413,
+        "n_threads": 10,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005790899,
+        "n_threads": 10,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004069952,
+        "n_threads": 10,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.070014211,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.01169825,
+        "n_threads": 10,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004476927,
+        "n_threads": 10,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004109582,
+        "n_threads": 10,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.00390903,
+        "n_threads": 10,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_11_threads.json b/parallel/spmv/results/spmv_11_threads.json
new file mode 100644
index 00000000..8b4dc88f
--- /dev/null
+++ b/parallel/spmv/results/spmv_11_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009519351,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004151958,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003503004,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.05857516,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.009049442,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002777395,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002850664,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002721331,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000166378,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.0023e-5,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 3.8218e-5,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000466475,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 8.6463e-5,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 3.9445e-5,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.3237e-5,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.2714e-5,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.00047243,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.00011345,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 9.9091e-5,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000615361,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000154398,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.5221e-5,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.2861e-5,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.303e-5,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.007744543,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001739751,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001594418,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.007926876,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00245018,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001075475,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000723042,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000713275,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.5601e-5,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.4959e-5,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.1398e-5,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 6.2644e-5,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5593e-5,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.4851e-5,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5753e-5,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.4734e-5,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.016515352,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002263252,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001776991,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001945073,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001811728,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001767701,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001797292,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001791533,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.023331361,
+        "n_threads": 11,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.00517173,
+        "n_threads": 11,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.00431359,
+        "n_threads": 11,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.064806961,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.010919977,
+        "n_threads": 11,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004261696,
+        "n_threads": 11,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004231305,
+        "n_threads": 11,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004286125,
+        "n_threads": 11,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_12_threads.json b/parallel/spmv/results/spmv_12_threads.json
new file mode 100644
index 00000000..2dca4862
--- /dev/null
+++ b/parallel/spmv/results/spmv_12_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009956742,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003543329,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003018592,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.054223164,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.009281952,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002476554,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002475601,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002396018,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000166476,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.6544e-5,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 3.8386e-5,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000423218,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 8.1339e-5,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.0795e-5,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.1059e-5,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 3.9998e-5,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.00046491,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000101879,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 9.2965e-5,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.001114028,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000172956,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.4726e-5,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.2158e-5,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 6.2546e-5,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.00726372,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001529965,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001531283,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.005779237,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00240589,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000969966,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000661485,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00064121,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.6121e-5,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.3526e-5,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.3031e-5,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 5.9906e-5,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5602e-5,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6395e-5,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6095e-5,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5727e-5,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.016858536,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002119424,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001766678,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001916977,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001798534,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001789788,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001807332,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001802153,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.023026484,
+        "n_threads": 12,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.00483719,
+        "n_threads": 12,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004030619,
+        "n_threads": 12,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.057069952,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.010046876,
+        "n_threads": 12,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.003883476,
+        "n_threads": 12,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.003866337,
+        "n_threads": 12,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004119254,
+        "n_threads": 12,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_1_threads.json b/parallel/spmv/results/spmv_1_threads.json
new file mode 100644
index 00000000..99abe8c4
--- /dev/null
+++ b/parallel/spmv/results/spmv_1_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009758493,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.011934781,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.009648794,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.376620832,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.045343777,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.00948524,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.009382047,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.009472763,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000166965,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000204249,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000162038,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.004114989,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000571666,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.0001708,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000169842,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000170159,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000471069,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000495378,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000421551,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.005181615,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000937408,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.00043271,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000432927,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000433703,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008194762,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.007958217,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.007081718,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.044890333,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.01203481,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.007812929,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.007975824,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.007909392,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.5647e-5,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000114406,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000106025,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000464804,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000144713,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 8.4714e-5,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 8.4217e-5,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 8.5009e-5,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.01736104,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.014943839,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.011276663,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.0145074,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.011918071,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.01107792,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.010968425,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.010979151,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.02377866,
+        "n_threads": 1,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.024882528,
+        "n_threads": 1,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.023154304,
+        "n_threads": 1,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.509482449,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.069820808,
+        "n_threads": 1,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.022243753,
+        "n_threads": 1,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.022472338,
+        "n_threads": 1,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.023075844,
+        "n_threads": 1,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_2_threads.json b/parallel/spmv/results/spmv_2_threads.json
new file mode 100644
index 00000000..3a9e9245
--- /dev/null
+++ b/parallel/spmv/results/spmv_2_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009964391,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.007972971,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.005877332,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.23656665,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.029448929,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.005641295,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.00564319,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.005551018,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000166121,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000124576,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 9.9059e-5,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.002430129,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000331861,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.00010153,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000102108,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000103103,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000467852,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000280337,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000243803,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.003017835,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000506079,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000226237,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000219601,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000221282,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008039944,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.004703203,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00432242,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.025745014,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.006596804,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.004413467,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003589113,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003635945,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.5644e-5,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 7.61e-5,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 5.8658e-5,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000269209,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 8.0223e-5,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.9227e-5,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.9069e-5,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.9087e-5,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.017415185,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.0098262,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.005780872,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.008157137,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.006249572,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00536363,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.005423026,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.005580556,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.024042514,
+        "n_threads": 2,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.019520349,
+        "n_threads": 2,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.012407825,
+        "n_threads": 2,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.283553106,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.045740471,
+        "n_threads": 2,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.013791914,
+        "n_threads": 2,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.013032261,
+        "n_threads": 2,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.013846252,
+        "n_threads": 2,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_3_threads.json b/parallel/spmv/results/spmv_3_threads.json
new file mode 100644
index 00000000..8c726fa0
--- /dev/null
+++ b/parallel/spmv/results/spmv_3_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.010034864,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.005702206,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.00529541,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.16750896,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.019890016,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004619594,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004590387,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004782672,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000168131,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 9.5825e-5,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 7.9691e-5,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.001730798,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000244045,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 7.8443e-5,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 7.8327e-5,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 7.9323e-5,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000483364,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000218196,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.00020345,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.002179718,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000386113,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000163194,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000159581,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000157146,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008002527,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003662739,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003372173,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.017701609,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.005652785,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003126198,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002381582,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002357669,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.5258e-5,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 5.6856e-5,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.6351e-5,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000196504,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 5.8414e-5,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.1517e-5,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.0788e-5,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.1476e-5,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.01737392,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.005220831,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003997662,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.006365112,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004410126,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00381635,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003831612,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003768764,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.024035454,
+        "n_threads": 3,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.013937966,
+        "n_threads": 3,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.011814697,
+        "n_threads": 3,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.209505113,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.030546431,
+        "n_threads": 3,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.012302445,
+        "n_threads": 3,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.008427558,
+        "n_threads": 3,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.009402599,
+        "n_threads": 3,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_4_threads.json b/parallel/spmv/results/spmv_4_threads.json
new file mode 100644
index 00000000..de3250f5
--- /dev/null
+++ b/parallel/spmv/results/spmv_4_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009845164,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.005387483,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004176382,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.127107448,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.017085461,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003669864,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003864655,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003994657,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.00016827,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 8.2798e-5,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 6.1718e-5,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.001161871,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.00018191,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 6.492e-5,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 6.6829e-5,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 6.6348e-5,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000466017,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000187027,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000165901,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.0014863,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000313389,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000138918,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000133718,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000133512,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008020643,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003470342,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002803906,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.014968816,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.004926657,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002386644,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001786607,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00177909,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.6165e-5,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.6473e-5,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.938e-5,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.000136472,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.6004e-5,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.5445e-5,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.5213e-5,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.2156e-5,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.01632266,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003858071,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002902628,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004289577,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003202864,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002979373,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002901255,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002925194,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.023934745,
+        "n_threads": 4,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.012834558,
+        "n_threads": 4,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.009554827,
+        "n_threads": 4,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.165732327,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.024912395,
+        "n_threads": 4,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.009573885,
+        "n_threads": 4,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.009831324,
+        "n_threads": 4,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.009350548,
+        "n_threads": 4,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_5_threads.json b/parallel/spmv/results/spmv_5_threads.json
new file mode 100644
index 00000000..c22a215b
--- /dev/null
+++ b/parallel/spmv/results/spmv_5_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009567207,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004687249,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004546803,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.118082368,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.01372927,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002981844,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003414457,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003350071,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.00016666,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 7.0345e-5,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.4058e-5,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000961102,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000150538,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.4963e-5,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.254e-5,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.4561e-5,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000474019,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000177902,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000147282,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.001236862,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000269187,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000112857,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.0001072,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000106968,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008057907,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003022412,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002447972,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.012907031,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003772267,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002026583,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001485475,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001766963,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.6056e-5,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.9651e-5,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.3699e-5,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00011661,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 4.0058e-5,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.0503e-5,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.9704e-5,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.9704e-5,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.017574168,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.004571,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002449066,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.0036248,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002847066,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002436183,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002348688,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002348761,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.023977009,
+        "n_threads": 5,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.006771296,
+        "n_threads": 5,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.008518309,
+        "n_threads": 5,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.125873658,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.020248894,
+        "n_threads": 5,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.008140321,
+        "n_threads": 5,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.007933917,
+        "n_threads": 5,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.008153084,
+        "n_threads": 5,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_6_threads.json b/parallel/spmv/results/spmv_6_threads.json
new file mode 100644
index 00000000..564d1b14
--- /dev/null
+++ b/parallel/spmv/results/spmv_6_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009361234,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004478361,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003534819,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.100616344,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.013263877,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003124986,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003100511,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003088635,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000168818,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 6.1869e-5,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.6988e-5,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000822581,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000134858,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.1506e-5,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.2901e-5,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.2046e-5,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000471814,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000153087,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000137955,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.001063284,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000239853,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 9.811e-5,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 9.2632e-5,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 9.4824e-5,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008051642,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002614253,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00246389,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.010403887,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00393785,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001883809,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001164359,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.0011666,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.5592e-5,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.3877e-5,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6638e-5,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 9.9549e-5,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.6346e-5,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.7678e-5,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6703e-5,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.7389e-5,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.017526908,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00278057,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002054939,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003030898,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002767055,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002009575,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001942518,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002004999,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.024166515,
+        "n_threads": 6,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.006199904,
+        "n_threads": 6,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.007344162,
+        "n_threads": 6,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.114795672,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.017183134,
+        "n_threads": 6,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005600838,
+        "n_threads": 6,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.007002531,
+        "n_threads": 6,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005885946,
+        "n_threads": 6,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_7_threads.json b/parallel/spmv/results/spmv_7_threads.json
new file mode 100644
index 00000000..26e801f6
--- /dev/null
+++ b/parallel/spmv/results/spmv_7_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009616459,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.005433496,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004611218,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.087252937,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.012298776,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003267433,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002727487,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003282652,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000176648,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.6046e-5,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.8348e-5,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000701823,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000114441,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.1847e-5,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.7e-5,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.404e-5,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.00047561,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000144885,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000134665,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000936077,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000222172,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 8.911e-5,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.9689e-5,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 8.7207e-5,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008107633,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002707667,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002715647,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.010539581,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003588769,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001565399,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001062527,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001009851,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.6731e-5,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.1831e-5,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6577e-5,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 8.7477e-5,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.1671e-5,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6019e-5,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5882e-5,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.6186e-5,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.017520845,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.003367956,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001910576,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002621007,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002502208,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001824189,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001862839,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001873753,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.02395662,
+        "n_threads": 7,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.007852781,
+        "n_threads": 7,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004915653,
+        "n_threads": 7,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.098469177,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.015045842,
+        "n_threads": 7,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005972665,
+        "n_threads": 7,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005895998,
+        "n_threads": 7,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005981885,
+        "n_threads": 7,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_8_threads.json b/parallel/spmv/results/spmv_8_threads.json
new file mode 100644
index 00000000..0a0247fd
--- /dev/null
+++ b/parallel/spmv/results/spmv_8_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009633471,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003509669,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003125268,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.075567052,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.010366105,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002788955,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002749097,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.002698756,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000167885,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.1249e-5,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.2164e-5,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000619043,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000107943,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.4571e-5,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.3739e-5,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.4989e-5,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000480932,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000142752,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000126561,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000830785,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000201268,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.7033e-5,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.7338e-5,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.509e-5,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.007914173,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002474617,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002108031,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.010114297,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.003139314,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.00141751,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000897536,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000919506,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.6804e-5,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.7154e-5,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.2343e-5,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 7.893e-5,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 3.0141e-5,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.3056e-5,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.4862e-5,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.3545e-5,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.016180878,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002931903,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001838998,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002342052,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002232027,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001832037,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001836819,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001831811,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.022766545,
+        "n_threads": 8,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.006992848,
+        "n_threads": 8,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005595463,
+        "n_threads": 8,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.085630893,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.013449564,
+        "n_threads": 8,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005515747,
+        "n_threads": 8,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005354054,
+        "n_threads": 8,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005351433,
+        "n_threads": 8,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/results/spmv_9_threads.json b/parallel/spmv/results/spmv_9_threads.json
new file mode 100644
index 00000000..43c4fd00
--- /dev/null
+++ b/parallel/spmv/results/spmv_9_threads.json
@@ -0,0 +1,466 @@
+[
+    {
+        "time": 0.009908263,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.004814435,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003935865,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.070765292,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.010500957,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003227291,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003207084,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.003073351,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "Williams",
+        "matrix": "Williams/webbase-1M"
+    },
+    {
+        "time": 0.000167905,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 5.0516e-5,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.4298e-5,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000559596,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 9.8149e-5,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.1998e-5,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.0085e-5,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 4.385e-5,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "vanHeukelum",
+        "matrix": "vanHeukelum/cage10"
+    },
+    {
+        "time": 0.000473874,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000131775,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000117682,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000741204,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.000186258,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.6094e-5,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.0633e-5,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 7.1818e-5,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Da"
+    },
+    {
+        "time": 0.008060779,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002075341,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001837394,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.009405833,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.002912078,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.001223185,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000803988,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 0.000800697,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "FEMLAB",
+        "matrix": "FEMLAB/poisson3Db"
+    },
+    {
+        "time": 9.6064e-5,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5906e-5,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.3975e-5,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 7.158e-5,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.8141e-5,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.5829e-5,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.3908e-5,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 2.4674e-5,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1024,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.017438506,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002011319,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001814129,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.002144707,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001850657,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.00178314,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001809725,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.001819882,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 8192,
+            "sparsity": 0.1
+        }
+    },
+    {
+        "time": 0.022221907,
+        "n_threads": 9,
+        "method": "serial_default_implementation",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.003929099,
+        "n_threads": 9,
+        "method": "finch_parallel",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005236155,
+        "n_threads": 9,
+        "method": "static_rows_equal",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.077467082,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_1",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.012714277,
+        "n_threads": 9,
+        "method": "dynamic_rows_grain_10",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.004486373,
+        "n_threads": 9,
+        "method": "merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.005008397,
+        "n_threads": 9,
+        "method": "graph_partition_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    },
+    {
+        "time": 0.003937992,
+        "n_threads": 9,
+        "method": "graph_partition_weighted_reorder_merge",
+        "dataset": "uniform",
+        "matrix": {
+            "size": 1048576,
+            "sparsity": 3000000
+        }
+    }
+]
diff --git a/parallel/spmv/run.sh b/parallel/spmv/run.sh
new file mode 100755
index 00000000..8b5565de
--- /dev/null
+++ b/parallel/spmv/run.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+for (( t=1 ; t<=$1 ; t++));
+do
+	echo "Running run_spmv.jl with $t threads"
+	julia "--threads=$t" "run_spmv.jl"
+done
diff --git a/parallel/spmv/run_local.sh b/parallel/spmv/run_local.sh
new file mode 100755
index 00000000..d4525b32
--- /dev/null
+++ b/parallel/spmv/run_local.sh
@@ -0,0 +1 @@
+source run.sh 4
diff --git a/parallel/spmv/run_slurm.sl b/parallel/spmv/run_slurm.sl
new file mode 100644
index 00000000..b1e2eb37
--- /dev/null
+++ b/parallel/spmv/run_slurm.sl
@@ -0,0 +1,12 @@
+#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=24
+#SBATCH --exclusive
+#SBATCH -t 12:00:00
+#SBATCH --partition=lanka-v3
+#SBATCH --qos=commit-main
+#SBATCH --mem 102400
+cd /data/scratch/paramuth/FinchBenchmarks/parallel/spmv
+source run.sh 12
+
diff --git a/parallel/spmv/run_spmv.jl b/parallel/spmv/run_spmv.jl
new file mode 100644
index 00000000..5531e2ad
--- /dev/null
+++ b/parallel/spmv/run_spmv.jl
@@ -0,0 +1,145 @@
+using Base: nothing_sentinel
+#!/usr/bin/env julia
+if abspath(PROGRAM_FILE) == @__FILE__
+    using Pkg
+    Pkg.activate(dirname(@__DIR__))
+    Pkg.instantiate()
+end
+include("../../deps/diagnostics.jl")
+print_diagnostics()
+
+using MatrixDepot
+using BenchmarkTools
+using ArgParse
+using DataStructures
+using JSON
+using LinearAlgebra
+using Random
+
+Random.seed!(1234)
+
+using ThreadPinning
+pinthreads(numa(1))
+
+# Parsing Arguments
+s = ArgParseSettings("Run Parallel SpMV Experiments.")
+@add_arg_table! s begin
+    "--output", "-o"
+    arg_type = String
+    help = "output file path"
+    "--dataset", "-d"
+    arg_type = String
+    help = "dataset keyword"
+    "--method", "-m"
+    arg_type = String
+    help = "method keyword"
+    "--accuracy-check", "-a"
+    action = :store_true
+    help = "check method accuracy"
+end
+parsed_args = parse_args(ARGS, s)
+
+# Mapping from dataset types to datasets
+datasets = Dict(
+    "uniform" => [
+        OrderedDict("size" => 2^10, "sparsity" => 0.1),
+        OrderedDict("size" => 2^13, "sparsity" => 0.1),
+        OrderedDict("size" => 2^20, "sparsity" => 3_000_000)
+    ],
+    "FEMLAB" => [
+        "FEMLAB/poisson3Da",
+        "FEMLAB/poisson3Db",
+    ],
+    "vanHeukelum" => [
+        "vanHeukelum/cage10",
+        "vanHeukelum/cage11",
+        "vanHeukelum/cage12",
+    ],
+    "Williams" => [
+        "Williams/webbase-1M",
+    ],
+)
+
+# Mapping from method keywords to methods
+include("serial_default_implementation.jl")
+# include("finch_parallel.jl")
+include("static_rows_equal.jl")
+# include("dynamic_rows_grain.jl")
+include("merge.jl")
+# include("graph_partition_reorder_merge.jl")
+include("graph_partition_weighted_reorder_merge.jl")
+
+methods = OrderedDict(
+    "serial_default_implementation" => serial_default_implementation_mul,
+    # "finch_parallel" => finch_parallel,
+    "static_rows_equal" => static_rows_equal,
+    # "dynamic_rows_grain_1" => dynamic_rows_grain_generator(1),
+    # "dynamic_rows_grain_10" => dynamic_rows_grain_generator(10),
+    "merge" => merge,
+    # "graph_partition_reorder_merge" => graph_partition_reorder_merge,
+    "graph_partition_weighted_reorder_merge" => graph_partition_weighted_reorder_merge,
+)
+
+if !isnothing(parsed_args["method"])
+    method_name = parsed_args["method"]
+    @assert haskey(methods, method_name) "Unrecognize method for $method_name"
+    methods = OrderedDict(
+        method_name => methods[method_name]
+    )
+end
+
+function calculate_results(dataset, mtxs, results)
+    for mtx in mtxs
+        # Get relevant matrix
+        if dataset == "uniform"
+            A = fsprand(mtx["size"], mtx["size"], mtx["sparsity"])
+        else
+            A = matrixdepot(mtx)
+        end
+
+        (num_rows, num_cols) = size(A)
+        # x is a dense vector
+        x = rand(num_cols)
+        # y is the result vector
+        y = zeros(num_rows)
+
+        for (key, method) in methods
+            result = method(y, A, x)
+
+            if parsed_args["accuracy-check"]
+                # Check the result of the multiplication
+                serial_default_implementation_result = serial_default_implementation_mul(y, A, x)
+                @assert norm(result.y - serial_default_implementation_result.y) / norm(serial_default_implementation_result.y) < 0.01 "Incorrect result for $key"
+            end
+
+            # Write result
+            time = result.time
+            @info "result for $key on $mtx" time
+            push!(results, OrderedDict(
+                "time" => time,
+                "n_threads" => Threads.nthreads(),
+                "method" => key,
+                "dataset" => dataset,
+                "matrix" => mtx,
+            ))
+            if isnothing(parsed_args["output"])
+                write("results/spmv_$(Threads.nthreads())_threads.json", JSON.json(results, 4))
+            else
+                write(parsed_args["output"], JSON.json(results, 4))
+            end
+        end
+    end
+end
+
+results = []
+if isnothing(parsed_args["dataset"])
+    for (dataset, mtxs) in datasets
+        calculate_results(dataset, mtxs, results)
+    end
+else
+    dataset = parsed_args["dataset"]
+    mtxs = datasets[dataset]
+    calculate_results(dataset, mtxs, results)
+end
+
+
diff --git a/parallel/spmv/serial_default_implementation.jl b/parallel/spmv/serial_default_implementation.jl
new file mode 100644
index 00000000..d4070a8a
--- /dev/null
+++ b/parallel/spmv/serial_default_implementation.jl
@@ -0,0 +1,19 @@
+using Finch
+using BenchmarkTools
+
+
+function serial_default_implementation_mul(y, A, x)
+        _y = Tensor(Dense(Element(0.0)), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (_y, _A, _x) = $(_y, _A, _x)
+                @finch mode = :fast begin
+                        _y .= 0
+                        for j = _, i = _
+                                _y[i] += _A[i, j] * _x[j]
+                        end
+                end
+        end
+        return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/static_rows_equal.jl b/parallel/spmv/static_rows_equal.jl
new file mode 100644
index 00000000..cc91a507
--- /dev/null
+++ b/parallel/spmv/static_rows_equal.jl
@@ -0,0 +1,48 @@
+using Finch
+using BenchmarkTools
+
+function static_rows_equal_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        tns_lvl = A.body.lvl
+        tns_lvl_2 = tns_lvl.lvl
+        tns_lvl_ptr = tns_lvl_2.ptr
+        tns_lvl_idx = tns_lvl_2.idx
+        tns_lvl_2_val = tns_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape)
+        val = y_lvl_val
+        y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads()))
+        x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads()))
+        tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads()))
+        tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads()))
+        tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads()))
+
+        Threads.@threads for t = 1:Threads.nthreads()
+            Finch.@barrier begin
+                @inbounds @fastmath(begin
+                    for i = 1+div((t - 1) * tns_lvl.shape, Threads.nthreads()):div(t * tns_lvl.shape, Threads.nthreads())
+                        for ptr = tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1
+                            val[i] += tns_lvl_2_val[ptr] * x_lvl_val[tns_lvl_idx[ptr]]
+                        end
+                    end
+                end)
+            end
+        end
+
+        resize!(val, tns_lvl.shape)
+    end)
+end
+
+function static_rows_equal(y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+
+    time = @belapsed static_rows_equal_helper($_y, $_A, $_x)
+    return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/unused/finch_kernel_parallel.jl b/parallel/spmv/unused/finch_kernel_parallel.jl
new file mode 100644
index 00000000..67e95e67
--- /dev/null
+++ b/parallel/spmv/unused/finch_kernel_parallel.jl
@@ -0,0 +1,88 @@
+using Finch
+using BenchmarkTools
+
+function finch_kernel_parallel_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        tns_lvl = A.body.lvl
+        tns_lvl_2 = tns_lvl.lvl
+        tns_lvl_ptr = tns_lvl_2.ptr
+        tns_lvl_idx = tns_lvl_2.idx
+        tns_lvl_2_val = tns_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape)
+        val = y_lvl_val
+        y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads()))
+        x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads()))
+        tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads()))
+        tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads()))
+        tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads()))
+        Threads.@threads for i_4 = 1:Threads.nthreads()
+            Finch.@barrier begin
+                @inbounds @fastmath(begin
+                    phase_start_2 = max(1, 1 + fld(tns_lvl.shape * (i_4 + -1), Threads.nthreads()))
+                    phase_stop_2 = min(tns_lvl.shape, fld(tns_lvl.shape * i_4, Threads.nthreads()))
+                    if phase_stop_2 >= phase_start_2
+                        for i_7 = phase_start_2:phase_stop_2
+                            y_lvl_q = (1 - 1) * tns_lvl.shape + i_7
+                            tns_lvl_q = (1 - 1) * tns_lvl.shape + i_7
+                            tns_lvl_2_q = tns_lvl_ptr[tns_lvl_q]
+                            tns_lvl_2_q_stop = tns_lvl_ptr[tns_lvl_q+1]
+                            if tns_lvl_2_q < tns_lvl_2_q_stop
+                                tns_lvl_2_i1 = tns_lvl_idx[tns_lvl_2_q_stop-1]
+                            else
+                                tns_lvl_2_i1 = 0
+                            end
+                            phase_stop_3 = min(x_lvl.shape, tns_lvl_2_i1)
+                            if phase_stop_3 >= 1
+                                if tns_lvl_idx[tns_lvl_2_q] < 1
+                                    tns_lvl_2_q = Finch.scansearch(tns_lvl_idx, 1, tns_lvl_2_q, tns_lvl_2_q_stop - 1)
+                                end
+                                while true
+                                    tns_lvl_2_i = tns_lvl_idx[tns_lvl_2_q]
+                                    if tns_lvl_2_i < phase_stop_3
+                                        tns_lvl_3_val = tns_lvl_2_val[tns_lvl_2_q]
+                                        x_lvl_q = (1 - 1) * x_lvl.shape + tns_lvl_2_i
+                                        x_lvl_2_val = x_lvl_val[x_lvl_q]
+                                        y_lvl_val[y_lvl_q] = tns_lvl_3_val * x_lvl_2_val + y_lvl_val[y_lvl_q]
+                                        tns_lvl_2_q += 1
+                                    else
+                                        phase_stop_5 = min(phase_stop_3, tns_lvl_2_i)
+                                        if tns_lvl_2_i == phase_stop_5
+                                            tns_lvl_3_val = tns_lvl_2_val[tns_lvl_2_q]
+                                            x_lvl_q = (1 - 1) * x_lvl.shape + phase_stop_5
+                                            x_lvl_2_val_2 = x_lvl_val[x_lvl_q]
+                                            y_lvl_val[y_lvl_q] += tns_lvl_3_val * x_lvl_2_val_2
+                                            tns_lvl_2_q += 1
+                                        end
+                                        break
+                                    end
+                                end
+                            end
+                        end
+                    end
+                    phase_start_6 = max(1, 1 + fld(tns_lvl.shape * i_4, Threads.nthreads()))
+                    phase_stop_7 = tns_lvl.shape
+                    if phase_stop_7 >= phase_start_6
+                        phase_stop_7 + 1
+                    end
+                end)
+                nothing
+            end
+        end
+        resize!(val, tns_lvl.shape)
+    end)
+end
+
+function finch_kernel_parallel(y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+
+    time = @belapsed finch_kernel_parallel_helper($_y, $_A, $_x) 
+    return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/unused/graph_partition.jl b/parallel/spmv/unused/graph_partition.jl
new file mode 100644
index 00000000..3464bec5
--- /dev/null
+++ b/parallel/spmv/unused/graph_partition.jl
@@ -0,0 +1,78 @@
+using Finch
+using BenchmarkTools
+using Graphs
+using Metis
+
+function create_partitions(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}})
+    tns_lvl = A.body.lvl
+    tns_lvl_2 = tns_lvl.lvl
+    tns_lvl_ptr = tns_lvl_2.ptr
+    tns_lvl_idx = tns_lvl_2.idx
+
+    tns_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(tns_lvl.shape) != $(tns_lvl_2.shape))"))
+
+    graph = SimpleGraph(tns_lvl.shape)
+    for v in 1:tns_lvl.shape
+        for ptr in tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1
+            add_edge!(graph, v, tns_lvl_idx[ptr])
+        end
+    end
+
+    # Partition the graph
+    positions = Metis.partition(graph, Threads.nthreads(); alg=:KWAY)
+    partitions = [findall(positions .== i) for i in 1:Threads.nthreads()]
+
+    return partitions
+end
+
+function graph_partition_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, partitions::Vector{Vector{Int64}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        tns_lvl = A.body.lvl
+        tns_lvl_2 = tns_lvl.lvl
+        tns_lvl_ptr = tns_lvl_2.ptr
+        tns_lvl_idx = tns_lvl_2.idx
+        tns_lvl_2_val = tns_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape)
+        val = y_lvl_val
+
+        y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads()))
+        x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads()))
+        tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads()))
+        tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads()))
+        tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads()))
+        partitions = (Finch).moveto(partitions, CPU(Threads.nthreads()))
+
+        Threads.@threads for i = 1:Threads.nthreads()
+            Finch.@barrier begin
+                @inbounds @fastmath(begin
+                    for row in partitions[i]
+                        for ptr = tns_lvl_ptr[row]:tns_lvl_ptr[row+1]-1
+                            col = tns_lvl_idx[ptr]
+                            A_val = tns_lvl_2_val[ptr]
+                            y_lvl_val[row] += A_val * x_lvl_val[col]
+                        end
+                    end
+                end)
+            end
+        end
+
+        resize!(val, tns_lvl.shape)
+    end)
+end
+
+function graph_partition(y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+    partitions = create_partitions(_A)
+
+    time = @belapsed graph_partition_helper($_y, $_A, $_x, $partitions)
+    return (; time=time, y=_y)
+end
+
diff --git a/parallel/spmv/unused/split_cols_dynamic_grain_scratchspace.jl b/parallel/spmv/unused/split_cols_dynamic_grain_scratchspace.jl
new file mode 100644
index 00000000..0e9b6efe
--- /dev/null
+++ b/parallel/spmv/unused/split_cols_dynamic_grain_scratchspace.jl
@@ -0,0 +1,74 @@
+using Finch
+using BenchmarkTools
+using Base.Threads
+
+
+function split_cols_dynamic_grain_scratchspace_mul(grain_size)
+        return (y, A, x) -> split_cols_dynamic_grain_scratchspace_helper(grain_size, y, A, x)
+end
+function split_cols_dynamic_grain_scratchspace_helper(grain_size, y, A, x)
+        _y = Tensor(Dense(Element(0.0)), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (grain_size, _y, _A, _x) = $(grain_size, _y, _A, _x)
+                split_cols_dynamic_grain_scratchspace(grain_size, _y, _A, _x)
+        end
+        return (; time=time, y=_y)
+end
+
+function split_cols_dynamic_grain_scratchspace(grain_size::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+        @inbounds @fastmath(begin
+                y_lvl = y.lvl # DenseLevel
+                # y_lvl_2 = y_lvl.lvl # ElementLevel
+                y_lvl_val = y_lvl.lvl.val # Vector{Float64}
+
+                A_lvl = A.lvl # DenseLevel
+                A_lvl_2 = A_lvl.lvl # SparseListLevel
+                A_lvl_ptr = A_lvl_2.ptr # Vector{Int64}
+                A_lvl_idx = A_lvl_2.idx # Vector{Int64}
+                # A_lvl_3 = A_lvl_2.lvl # ElementLevel
+                A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64}
+
+                x_lvl = x.lvl # DenseLevel
+                # x_lvl_2 = x_lvl.lvl # ElementLevel
+                x_lvl_val = x_lvl.lvl.val # Vector{Float64}
+
+                x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))"))
+                Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape)
+                Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape)
+
+                num_threads = Threads.nthreads()
+                y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads]
+
+                cap_size = div(A_lvl.shape, grain_size) * grain_size
+
+                Threads.@threads for group = 1:grain_size:cap_size
+                        y_temp = y_temps[Threads.threadid()]
+                        for j in group:group+grain_size-1
+                                for q in A_lvl_ptr[j]:A_lvl_ptr[j+1]-1
+                                        i = A_lvl_idx[q]
+                                        temp_val = A_lvl_2_val[q] * x_lvl_val[j]
+                                        y_temp[i] += temp_val
+                                end
+                        end
+                end
+
+                Threads.@threads for j = cap_size+1:A_lvl.shape
+                        y_temp = y_temps[Threads.threadid()]
+                        for q in A_lvl_ptr[j]:A_lvl_ptr[j+1]-1
+                                i = A_lvl_idx[q]
+                                temp_val = A_lvl_2_val[q] * x_lvl_val[j]
+                                y_temp[i] += temp_val
+                        end
+                end
+
+                Threads.@threads for k = 1:num_threads
+                        for j = 1:num_threads
+                                for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads)
+                                        y_lvl_val[i] += y_temps[j][i]
+                                end
+                        end
+                end
+        end)
+end
diff --git a/parallel/spmv/unused/split_cols_finch_parallel_atomics.jl b/parallel/spmv/unused/split_cols_finch_parallel_atomics.jl
new file mode 100644
index 00000000..d0c34c20
--- /dev/null
+++ b/parallel/spmv/unused/split_cols_finch_parallel_atomics.jl
@@ -0,0 +1,19 @@
+using Finch
+using BenchmarkTools
+
+
+function split_cols_finch_parallel_atomics_mul(y, A, x)
+        _y = Tensor(Dense(AtomicElement(0.0)), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (_y, _A, _x) = $(_y, _A, _x)
+                @finch mode = :fast begin
+                        _y .= 0
+                        for j = parallel(_), i = _
+                                _y[i] += _A[i, j] * _x[j]
+                        end
+                end
+        end
+        return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/unused/split_cols_finch_parallel_mutex.jl b/parallel/spmv/unused/split_cols_finch_parallel_mutex.jl
new file mode 100644
index 00000000..3d020338
--- /dev/null
+++ b/parallel/spmv/unused/split_cols_finch_parallel_mutex.jl
@@ -0,0 +1,19 @@
+using Finch
+using BenchmarkTools
+
+
+function split_cols_finch_parallel_mutex_mul(y, A, x)
+        _y = Tensor(Dense(Mutex(Element(0.0))), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (_y, _A, _x) = $(_y, _A, _x)
+                @finch mode = :fast begin
+                        _y .= 0
+                        for j = parallel(_), i = _
+                                _y[i] += _A[i, j] * _x[j]
+                        end
+                end
+        end
+        return (; time=time, y=_y)
+end
diff --git a/parallel/spmv/unused/split_cols_static_scratchspace.jl b/parallel/spmv/unused/split_cols_static_scratchspace.jl
new file mode 100644
index 00000000..2195031f
--- /dev/null
+++ b/parallel/spmv/unused/split_cols_static_scratchspace.jl
@@ -0,0 +1,59 @@
+using Finch
+using BenchmarkTools
+using Base.Threads
+
+
+function split_cols_static_scratchspace_mul(y, A, x)
+        _y = Tensor(Dense(Element(0.0)), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (_y, _A, _x) = $(_y, _A, _x)
+                split_cols_static_scratchspace(_y, _A, _x)
+        end
+        return (; time=time, y=_y)
+end
+
+function split_cols_static_scratchspace(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+        @inbounds @fastmath(begin
+                y_lvl = y.lvl # DenseLevel
+                # y_lvl_2 = y_lvl.lvl # ElementLevel
+                y_lvl_val = y_lvl.lvl.val # Vector{Float64}
+
+                A_lvl = A.lvl # DenseLevel
+                A_lvl_2 = A_lvl.lvl # SparseListLevel
+                A_lvl_ptr = A_lvl_2.ptr # Vector{Int64}
+                A_lvl_idx = A_lvl_2.idx # Vector{Int64}
+                # A_lvl_3 = A_lvl_2.lvl # ElementLevel
+                A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64}
+
+                x_lvl = x.lvl # DenseLevel
+                # x_lvl_2 = x_lvl.lvl # ElementLevel
+                x_lvl_val = x_lvl.lvl.val # Vector{Float64}
+
+                x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))"))
+                Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape)
+                Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape)
+
+                num_threads = Threads.nthreads()
+                y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads]
+
+                Threads.@threads for k = 1:num_threads
+                        for j = 1+div((k - 1) * A_lvl.shape, num_threads):div(k * A_lvl.shape, num_threads)
+                                for q in A_lvl_ptr[j]:A_lvl_ptr[j+1]-1
+                                        i = A_lvl_idx[q]
+                                        temp_val = A_lvl_2_val[q] * x_lvl_val[j]
+                                        y_temps[k][i] += temp_val
+                                end
+                        end
+                end
+
+                Threads.@threads for k = 1:num_threads
+                        for j = 1:num_threads
+                                for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads)
+                                        y_lvl_val[i] += y_temps[j][i]
+                                end
+                        end
+                end
+        end)
+end
diff --git a/parallel/spmv/unused/split_nonzeros_dynamic_grain_scratchspace.jl b/parallel/spmv/unused/split_nonzeros_dynamic_grain_scratchspace.jl
new file mode 100644
index 00000000..11c844e9
--- /dev/null
+++ b/parallel/spmv/unused/split_nonzeros_dynamic_grain_scratchspace.jl
@@ -0,0 +1,87 @@
+using Finch
+using BenchmarkTools
+using Base.Threads
+
+function split_nonzeros_dynamic_grain_scratchspace_mul(grain_size)
+        return (y, A, x) -> split_nonzeros_dynamic_grain_scratchspace_helper(grain_size, y, A, x)
+end
+
+
+function split_nonzeros_dynamic_grain_scratchspace_helper(grain_size, y, A, x)
+        _y = Tensor(Dense(Element(0.0)), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (grain_size, _y, _A, _x) = $(grain_size, _y, _A, _x)
+                split_nonzeros_dynamic_grain_scratchspace(grain_size, _y, _A, _x)
+        end
+        return (; time=time, y=_y)
+end
+
+function split_nonzeros_dynamic_grain_scratchspace(grain_size::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+        @inbounds @fastmath(begin
+                y_lvl = y.lvl # DenseLevel
+                # y_lvl_2 = y_lvl.lvl # ElementLevel
+                y_lvl_val = y_lvl.lvl.val # Vector{Float64}
+
+                A_lvl = A.lvl # DenseLevel
+                A_lvl_2 = A_lvl.lvl # SparseListLevel
+                A_lvl_ptr = A_lvl_2.ptr # Vector{Int64}
+                A_lvl_idx = A_lvl_2.idx # Vector{Int64}
+                # A_lvl_3 = A_lvl_2.lvl # ElementLevel
+                A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64}
+
+                x_lvl = x.lvl # DenseLevel
+                # x_lvl_2 = x_lvl.lvl # ElementLevel
+                x_lvl_val = x_lvl.lvl.val # Vector{Float64}
+
+                x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))"))
+                Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape)
+                Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape)
+
+                num_threads = Threads.nthreads()
+                y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads]
+
+                # Load Balancing
+                num_nz = A_lvl_ptr[A_lvl.shape+1] - 1
+                num_iter = div(num_nz, grain_size, RoundUp)
+
+                start_indices = [1 + grain_size * k for k in 0:num_iter]
+                start_cols = zeros(Int64, num_iter + 1)
+
+                col = 2
+                target_pos = 1
+                target_index = start_indices[target_pos]
+                while (col <= A_lvl.shape + 1 && target_pos <= num_iter)
+                        if (A_lvl_ptr[col] > target_index)
+                                start_cols[target_pos] = col - 1
+                                target_pos += 1
+                                target_index = start_indices[target_pos]
+                        else
+                                col += 1
+                        end
+                end
+                start_cols[num_iter+1] = A_lvl.shape
+
+                Threads.@threads for k = 1:num_iter
+                        start_index = start_indices[k]
+                        end_index = start_indices[k+1] - 1
+                        y_temp = y_temps[Threads.threadid()]
+                        for j = start_cols[k]:start_cols[k+1]
+                                for q in max(A_lvl_ptr[j], start_index):min(A_lvl_ptr[j+1] - 1, end_index)
+                                        i = A_lvl_idx[q]
+                                        temp_val = A_lvl_2_val[q] * x_lvl_val[j]
+                                        y_temp[i] += temp_val
+                                end
+                        end
+                end
+
+                Threads.@threads for k = 1:num_threads
+                        for j = 1:num_threads
+                                for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads)
+                                        y_lvl_val[i] += y_temps[j][i]
+                                end
+                        end
+                end
+        end)
+end
diff --git a/parallel/spmv/unused/split_nonzeros_static_scratchspace.jl b/parallel/spmv/unused/split_nonzeros_static_scratchspace.jl
new file mode 100644
index 00000000..349c0ead
--- /dev/null
+++ b/parallel/spmv/unused/split_nonzeros_static_scratchspace.jl
@@ -0,0 +1,79 @@
+using Finch
+using BenchmarkTools
+using Base.Threads
+
+function split_nonzeros_static_scratchspace_mul(y, A, x)
+        _y = Tensor(Dense(Element(0.0)), y)
+        _A = Tensor(Dense(SparseList(Element(0.0))), A)
+        _x = Tensor(Dense(Element(0.0)), x)
+        time = @belapsed begin
+                (_y, _A, _x) = $(_y, _A, _x)
+                split_nonzeros_static_scratchspace(_y, _A, _x)
+        end
+        return (; time=time, y=_y)
+end
+
+function split_nonzeros_static_scratchspace(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+        @inbounds @fastmath(begin
+                y_lvl = y.lvl # DenseLevel
+                # y_lvl_2 = y_lvl.lvl # ElementLevel
+                y_lvl_val = y_lvl.lvl.val # Vector{Float64}
+
+                A_lvl = A.lvl # DenseLevel
+                A_lvl_2 = A_lvl.lvl # SparseListLevel
+                A_lvl_ptr = A_lvl_2.ptr # Vector{Int64}
+                A_lvl_idx = A_lvl_2.idx # Vector{Int64}
+                # A_lvl_3 = A_lvl_2.lvl # ElementLevel
+                A_lvl_2_val = A_lvl_2.lvl.val # Vector{Float64}
+
+                x_lvl = x.lvl # DenseLevel
+                # x_lvl_2 = x_lvl.lvl # ElementLevel
+                x_lvl_val = x_lvl.lvl.val # Vector{Float64}
+
+                x_lvl.shape == A_lvl.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl.shape))"))
+                Finch.resize_if_smaller!(y_lvl_val, A_lvl_2.shape)
+                Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl_2.shape)
+
+                num_threads = Threads.nthreads()
+                y_temps = [zeros(Float64, y_lvl.shape) for _ in 1:num_threads]
+
+                # Load Balancing
+                num_nz = A_lvl_ptr[A_lvl.shape+1] - 1
+                start_indices = [1 + div(k * num_nz, num_threads) for k in 0:num_threads]
+                start_cols = zeros(Int64, num_threads + 1)
+
+                col = 2
+                target_pos = 1
+                target_index = start_indices[target_pos]
+                while (col <= A_lvl.shape + 1 && target_pos <= num_threads)
+                        if (A_lvl_ptr[col] > target_index)
+                                start_cols[target_pos] = col - 1
+                                target_pos += 1
+                                target_index = start_indices[target_pos]
+                        else
+                                col += 1
+                        end
+                end
+                start_cols[num_threads+1] = A_lvl.shape
+
+                Threads.@threads for k = 1:num_threads
+                        start_index = start_indices[k]
+                        end_index = start_indices[k+1] - 1
+                        for j = start_cols[k]:start_cols[k+1]
+                                for q in max(A_lvl_ptr[j], start_index):min(A_lvl_ptr[j+1] - 1, end_index)
+                                        i = A_lvl_idx[q]
+                                        temp_val = A_lvl_2_val[q] * x_lvl_val[j]
+                                        y_temps[k][i] += temp_val
+                                end
+                        end
+                end
+
+                Threads.@threads for k = 1:num_threads
+                        for j = 1:num_threads
+                                for i = 1+div((k - 1) * y_lvl.shape, num_threads):div(k * y_lvl.shape, num_threads)
+                                        y_lvl_val[i] += y_temps[j][i]
+                                end
+                        end
+                end
+        end)
+end
diff --git a/parallel/spmv/unused/transpose_split_rows_dynamic_grain.jl b/parallel/spmv/unused/transpose_split_rows_dynamic_grain.jl
new file mode 100644
index 00000000..4f9a6287
--- /dev/null
+++ b/parallel/spmv/unused/transpose_split_rows_dynamic_grain.jl
@@ -0,0 +1,59 @@
+using Finch
+using BenchmarkTools
+using Base.Threads
+
+
+function transpose_split_rows_dynamic_grain_mul(grain_size)
+    return (y, A, x) -> transpose_split_rows_dynamic_grain_helper(grain_size, y, A, x)
+end
+
+function transpose_split_rows_dynamic_grain_helper(grain_size, y, A, x)
+    _y = Tensor(Dense(Element(0.0)), y)
+    _A = swizzle(Tensor(Dense(SparseList(Element(0.0))), permutedims(A)), 2, 1)
+    _x = Tensor(Dense(Element(0.0)), x)
+    time = @belapsed begin
+        (grain_size, _y, _A, _x) = $(grain_size, _y, _A, _x)
+        transpose_split_rows_dynamic_grain(grain_size, _y, _A, _x)
+    end
+    return (; time=time, y=_y)
+end
+
+function transpose_split_rows_dynamic_grain(grain_size::Int64, y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        tns_lvl = A.body.lvl
+        tns_lvl_2 = tns_lvl.lvl
+        tns_lvl_ptr = tns_lvl_2.ptr
+        tns_lvl_idx = tns_lvl_2.idx
+        tns_lvl_2_val = tns_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape)
+
+        cap_size = div(tns_lvl.shape, grain_size) * grain_size
+
+        Threads.@threads for group = 1:grain_size:cap_size
+            Finch.@barrier group grain_size tns_lvl_ptr tns_lvl_idx y_lvl_val tns_lvl_2_val x_lvl_val begin
+                for i = group:group+grain_size-1
+                    for q in tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1
+                        j = tns_lvl_idx[q]
+                        y_lvl_val[i] += tns_lvl_2_val[q] * x_lvl_val[j]
+                    end
+                end
+            end
+        end
+
+        Threads.@threads for i = cap_size+1:tns_lvl.shape
+            Finch.@barrier tns_lvl_ptr tns_lvl_idx y_lvl_val tns_lvl_2_val x_lvl_val i begin
+                for q in tns_lvl_ptr[i]:tns_lvl_ptr[i+1]-1
+                    j = tns_lvl_idx[q]
+                    y_lvl_val[i] += tns_lvl_2_val[q] * x_lvl_val[j]
+                end
+            end
+        end
+    end)
+end
+
diff --git a/parallel/spmv/utils/merge.jl b/parallel/spmv/utils/merge.jl
new file mode 100644
index 00000000..58f7b216
--- /dev/null
+++ b/parallel/spmv/utils/merge.jl
@@ -0,0 +1,193 @@
+using Finch
+
+"""
+    merge_path_search(diagonal::Int64, num_rows::Int64, num_nzs::Int64, row_ptr::Vector{Int64})
+
+Find the intersecting coordinate between the merge path and the diagonal
+
+# Arguments
+- `diagonal::Int64`: the line s.t. row_idx + nz_idx = diagonal (diagonal >= 2)
+- `num_rows::Int64`: the number of rows
+- `num_nzs::Int64`: the number of nonzeros
+- `row_ptr::Vector{Int64}`: the row ptr representing a cumulative number of nonzero elements
+"""
+function merge_path_search(diagonal::Int64, num_rows::Int64, num_nzs::Int64, row_ptr::Vector{Int64})
+    x_min = max(diagonal - num_nzs - 1, 1)
+    x_max = min(diagonal - 1, num_rows + 1)
+
+    while x_min < x_max
+        pivot = (x_min + x_max) >> 1
+        if row_ptr[pivot+1] <= diagonal - pivot - 1
+            x_min = pivot + 1
+        else
+            x_max = pivot
+        end
+    end
+
+    return (min(x_min, num_rows + 1), diagonal - x_min)
+end
+
+"""
+    merge_swizzle_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+
+MergeSpMV on swizzle array
+
+# Arguments
+- `y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: 
+- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: 
+- `x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: 
+"""
+function merge_swizzle_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        tns_lvl = A.body.lvl
+        tns_lvl_2 = tns_lvl.lvl
+        tns_lvl_ptr = tns_lvl_2.ptr
+        tns_lvl_idx = tns_lvl_2.idx
+        tns_lvl_2_val = tns_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(tns_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, tns_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, tns_lvl.shape)
+        val = y_lvl_val
+
+        y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads()))
+        x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads()))
+        tns_lvl_ptr = (Finch).moveto(tns_lvl_ptr, CPU(Threads.nthreads()))
+        tns_lvl_idx = (Finch).moveto(tns_lvl_idx, CPU(Threads.nthreads()))
+        tns_lvl_2_val = (Finch).moveto(tns_lvl_2_val, CPU(Threads.nthreads()))
+
+        # Custom Variables
+        num_rows = tns_lvl.shape
+        num_nzs = last(tns_lvl_ptr) - 1
+        num_merge_items = num_rows + num_nzs # number of rows + number of nonzeros
+        items_per_thread = fld(num_merge_items + Threads.nthreads() - 1, Threads.nthreads())
+        row_carry_out = Vector{Int64}(undef, Threads.nthreads())
+        value_carry_out = Vector{Float64}(undef, Threads.nthreads())
+
+        row_carry_out = (Finch).moveto(row_carry_out, CPU(Threads.nthreads()))
+        value_carry_out = (Finch).moveto(value_carry_out, CPU(Threads.nthreads()))
+
+        Threads.@threads for i_4 = 1:Threads.nthreads()
+            Finch.@barrier begin
+                @inbounds @fastmath(begin
+                    diagonal = min(items_per_thread * (i_4 - 1) + 2, num_merge_items + 2)
+                    diagonal_end = min(diagonal + items_per_thread, num_merge_items + 2)
+                    x_coord, y_coord = merge_path_search(diagonal, num_rows, num_nzs, tns_lvl_ptr)
+                    x_coord_end, y_coord_end = merge_path_search(diagonal_end, num_rows, num_nzs, tns_lvl_ptr)
+
+                    running_total = 0.0
+                    while x_coord < x_coord_end
+                        while y_coord < tns_lvl_ptr[x_coord + 1]
+                            running_total += tns_lvl_2_val[y_coord] * x_lvl_val[tns_lvl_idx[y_coord]]
+                            y_coord += 1
+                        end
+                        val[x_coord] = running_total
+                        running_total = 0.0
+                        x_coord += 1
+                    end
+
+                    while y_coord < y_coord_end
+                        running_total += tns_lvl_2_val[y_coord] * x_lvl_val[tns_lvl_idx[y_coord]]
+                        y_coord += 1
+                    end
+
+                    row_carry_out[i_4] = x_coord_end
+                    value_carry_out[i_4] = running_total
+                end)
+            end
+        end
+
+        for i = 1:Threads.nthreads()
+            if row_carry_out[i] < num_rows + 1
+                val[row_carry_out[i]] += value_carry_out[i]
+            end
+        end
+        resize!(val, tns_lvl.shape)
+    end)
+end
+
+"""
+    merge_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+
+MergeSpMV on array
+
+# Arguments
+- `y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: 
+- `A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}`: 
+- `x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: 
+"""
+function merge_helper(y::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, A::Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}, x::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}})
+    @inbounds @fastmath(begin
+        y_lvl = y.lvl
+        y_lvl_val = y_lvl.lvl.val
+        A_lvl = A.lvl
+        A_lvl_2 = A_lvl.lvl
+        A_lvl_ptr = A_lvl_2.ptr
+        A_lvl_idx = A_lvl_2.idx
+        A_lvl_2_val = A_lvl_2.lvl.val
+        x_lvl = x.lvl
+        x_lvl_val = x_lvl.lvl.val
+        x_lvl.shape == A_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(x_lvl.shape) != $(A_lvl_2.shape))"))
+        Finch.resize_if_smaller!(y_lvl_val, A_lvl.shape)
+        Finch.fill_range!(y_lvl_val, 0.0, 1, A_lvl.shape)
+        val = y_lvl_val
+
+        y_lvl_val = (Finch).moveto(y_lvl_val, CPU(Threads.nthreads()))
+        x_lvl_val = (Finch).moveto(x_lvl_val, CPU(Threads.nthreads()))
+        A_lvl_ptr = (Finch).moveto(A_lvl_ptr, CPU(Threads.nthreads()))
+        A_lvl_idx = (Finch).moveto(A_lvl_idx, CPU(Threads.nthreads()))
+        A_lvl_2_val = (Finch).moveto(A_lvl_2_val, CPU(Threads.nthreads()))
+
+        # Custom Variables
+        num_rows = A_lvl.shape
+        num_nzs = last(A_lvl_ptr) - 1
+        num_merge_items = num_rows + num_nzs # number of rows + number of nonzeros
+        items_per_thread = fld(num_merge_items + Threads.nthreads() - 1, Threads.nthreads())
+        row_carry_out = Vector{Int64}(undef, Threads.nthreads())
+        value_carry_out = Vector{Float64}(undef, Threads.nthreads())
+
+        row_carry_out = (Finch).moveto(row_carry_out, CPU(Threads.nthreads()))
+        value_carry_out = (Finch).moveto(value_carry_out, CPU(Threads.nthreads()))
+
+        Threads.@threads for i_4 = 1:Threads.nthreads()
+            Finch.@barrier begin
+                @inbounds @fastmath(begin
+                    diagonal = min(items_per_thread * (i_4 - 1) + 2, num_merge_items + 2)
+                    diagonal_end = min(diagonal + items_per_thread, num_merge_items + 2)
+                    x_coord, y_coord = merge_path_search(diagonal, num_rows, num_nzs, A_lvl_ptr)
+                    x_coord_end, y_coord_end = merge_path_search(diagonal_end, num_rows, num_nzs, A_lvl_ptr)
+
+                    running_total = 0.0
+                    while x_coord < x_coord_end
+                        while y_coord < A_lvl_ptr[x_coord+1]
+                            running_total += A_lvl_2_val[y_coord] * x_lvl_val[A_lvl_idx[y_coord]]
+                            y_coord += 1
+                        end
+                        val[x_coord] = running_total
+                        running_total = 0.0
+                        x_coord += 1
+                    end
+
+                    while y_coord < y_coord_end
+                        running_total += A_lvl_2_val[y_coord] * x_lvl_val[A_lvl_idx[y_coord]]
+                        y_coord += 1
+                    end
+
+                    row_carry_out[i_4] = x_coord_end
+                    value_carry_out[i_4] = running_total
+                end)
+            end
+        end
+
+        for i = 1:Threads.nthreads()
+            if row_carry_out[i] < num_rows + 1
+                val[row_carry_out[i]] += value_carry_out[i]
+            end
+        end
+        resize!(val, A_lvl.shape)
+    end)
+end
+
diff --git a/parallel/spmv/utils/permutation.jl b/parallel/spmv/utils/permutation.jl
new file mode 100644
index 00000000..1fbfbf4b
--- /dev/null
+++ b/parallel/spmv/utils/permutation.jl
@@ -0,0 +1,131 @@
+using Metis: idx_t
+using Metis
+using SparseArrays
+using Finch
+
+"""
+    create_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}})
+
+Return a permutation of a matrix that will minimize communication of x value in Ax if we group the matrix in to num cores groups of equal size
+
+# Arguments
+- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: a matrix to create permutation on, the matrix must be NxN (rows == columns)
+"""
+function create_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}})
+    tns_lvl = A.body.lvl
+    tns_lvl_2 = tns_lvl.lvl
+    tns_lvl_ptr = tns_lvl_2.ptr
+    tns_lvl_idx = tns_lvl_2.idx
+
+    tns_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(tns_lvl.shape) != $(tns_lvl_2.shape))"))
+
+    nvtxs = convert(idx_t, tns_lvl.shape)
+    adjncy_temp = [idx_t[] for _ in 1:tns_lvl.shape]
+
+    for v in 1:tns_lvl.shape
+        for ptr in tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1
+            push!(adjncy_temp[tns_lvl_idx[ptr]], v)
+        end
+        append!(adjncy_temp[v], tns_lvl_idx[tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1])
+    end
+
+    xadj = idx_t[]
+    push!(xadj, convert(idx_t, 1))
+    for v in 1:tns_lvl.shape
+        push!(xadj, xadj[v] + length(adjncy_temp[v]))
+    end
+
+    adjncy = vcat(adjncy_temp...)
+
+    graph = Metis.Graph(nvtxs, xadj, adjncy)
+
+    # Partition the graph
+    positions = Metis.partition(graph, Threads.nthreads(); alg=:KWAY)
+
+    # create permutation for the graph 
+    perm = sortperm(positions)
+    return perm
+end
+
+"""
+    create_weighted_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}})
+
+Return a permutation of a matrix that will minimize communication of x value in Ax if we group the matrix in to num cores groups of equal size, where size for each row equals the number of nnz in that row + 1
+
+# Arguments
+- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: a matrix to create permutation on, the matrix must be NxN (rows == columns)
+"""
+function create_weighted_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}})
+    tns_lvl = A.body.lvl
+    tns_lvl_2 = tns_lvl.lvl
+    tns_lvl_ptr = tns_lvl_2.ptr
+    tns_lvl_idx = tns_lvl_2.idx
+
+    tns_lvl.shape == tns_lvl_2.shape || throw(DimensionMismatch("mismatched dimension limits ($(tns_lvl.shape) != $(tns_lvl_2.shape))"))
+
+    nvtxs = convert(idx_t, tns_lvl.shape)
+    adjncy_temp = [idx_t[] for _ in 1:tns_lvl.shape]
+    vwgt = zeros(idx_t, tns_lvl.shape)
+
+    for v in 1:tns_lvl.shape
+        for ptr in tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1
+            push!(adjncy_temp[tns_lvl_idx[ptr]], v)
+        end
+        vwgt[v] = tns_lvl_ptr[v+1] - tns_lvl_ptr[v] + 1 # the last 1 is for the row
+        append!(adjncy_temp[v], tns_lvl_idx[tns_lvl_ptr[v]:tns_lvl_ptr[v+1]-1])
+    end
+
+    xadj = idx_t[]
+    push!(xadj, convert(idx_t, 1))
+    for v in 1:tns_lvl.shape
+        push!(xadj, xadj[v] + length(adjncy_temp[v]))
+    end
+
+    adjncy = vcat(adjncy_temp...)
+
+    graph = Metis.Graph(nvtxs, xadj, adjncy, vwgt)
+
+    # Partition the graph
+    positions = Metis.partition(graph, Threads.nthreads(); alg=:KWAY)
+
+    # create permutation for the graph 
+    perm = sortperm(positions)
+    return perm
+end
+
+"""
+    vector_permutation(v::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, perm::Vector{Int64})
+
+Create a permutation of a vector
+
+# Arguments
+- `v::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}`: vector to be permuted
+- `perm::Vector{Int64}`: permutation vector, must have size at most length(v)
+"""
+function vector_permutation(v::Tensor{DenseLevel{Int64,ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}, perm::Vector{Int64})
+    v_lvl = v.lvl
+    v_lvl_val = v_lvl.lvl.val
+    v_perm = v_lvl_val[perm]
+    return Tensor(Dense(Element(0.0)), v_perm)
+end
+
+"""
+    matrix_col_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, perm::Vector{Int64})
+
+Create a column permutation of a matrix
+
+# Arguments
+- `A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}`: matrix to be permuted
+- `perm::Vector{Int64}`: permutation vector, must have size at most number of columns of A
+"""
+function matrix_col_permutation(A::Finch.SwizzleArray{(2, 1),Tensor{DenseLevel{Int64,SparseListLevel{Int64,Vector{Int64},Vector{Int64},ElementLevel{0.0,Float64,Int64,Vector{Float64}}}}}}, perm::Vector{Int64})
+    tns_lvl = A.body.lvl
+    tns_lvl_2 = tns_lvl.lvl
+    tns_lvl_ptr = tns_lvl_2.ptr
+    tns_lvl_idx = tns_lvl_2.idx
+    tns_lvl_2_val = tns_lvl_2.lvl.val
+
+    _A = SparseMatrixCSC(tns_lvl.shape, tns_lvl_2.shape, tns_lvl_ptr, tns_lvl_idx, tns_lvl_2_val)
+    A_perm = _A[:, perm]
+    return Tensor(Dense(SparseList(Element(0.0))), A_perm)
+end