Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ target_link_libraries(blast
)

target_compile_options(blast
INTERFACE "-Wno-ignored-attributes" "-fno-math-errno" "-ftemplate-backtrace-limit=0"
# Enable SIMD instruction sets, otherwise it does not compile.
# This will change when we support multiple architectures.
INTERFACE "-march=native" "-mfma" "-mavx" "-mavx2" "-msse4"
INTERFACE "-Wno-ignored-attributes" "-fno-math-errno"
)

target_compile_options(blast
INTERFACE
$<$<COMPILE_LANGUAGE:CXX>:-ftemplate-backtrace-limit=0>
)

# BLAST_WITH_BLASFEO
Expand Down
47 changes: 25 additions & 22 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ BENCH_BLASFEO = build/bin/bench-blasfeo
BENCH_BLAZE = build/bin/bench-blaze
BENCH_EIGEN = build/bin/bench-eigen
BENCH_BLAST = build/bin/bench-blast
BENCH_BLAST_OUTPUT_DIR = $(shell git rev-parse --short HEAD)
BENCH_BLAST_CPU_CORE = 11
BENCH_LIBXSMM = build/bin/bench-libxsmm
BENCHMARK_OPTIONS = --benchmark_repetitions=5 --benchmark_counters_tabular=true --benchmark_out_format=json --benchmark_report_aggregates_only=true
BENCHMARK_OPTIONS = --benchmark_repetitions=30 --benchmark_counters_tabular=true --benchmark_out_format=json --benchmark_enable_random_interleaving=true --benchmark_min_warmup_time=10 --benchmark_min_time=1000000x
RUN_MATLAB = matlab -nodisplay -nosplash -nodesktop -r
BENCH_DATA = bench_result/data
BENCH_IMAGE = bench_result/image
Expand Down Expand Up @@ -61,29 +63,29 @@ ${BENCH_DATA}/sgemm-blaze-static.json: $(BENCH_BLAZE)
$(BENCH_BLAZE) --benchmark_filter="BM_gemm_static<float>*" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/sgemm-blaze-static.json

${BENCH_DATA}/dgemm-blast-static-panel.json: $(BENCH_BLAST)
$(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel<double, .+>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/dgemm-blast-static-panel.json
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-panel.json: $(BENCH_BLAST)
taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel<double, .+>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-panel.json

${BENCH_DATA}/dgemm-blast-dynamic-panel.json: $(BENCH_BLAST)
$(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel<double>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/dgemm-blast-dynamic-panel.json
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-panel.json: $(BENCH_BLAST)
taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel<double>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-panel.json

${BENCH_DATA}/dgemm-blast-static-plain.json: $(BENCH_BLAST)
$(BENCH_BLAST) --benchmark_filter="BM_gemm_static_plain<double, .+>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/dgemm-blast-static-plain.json
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-plain.json: $(BENCH_BLAST)
taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_plain<double, .+>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-plain.json

${BENCH_DATA}/dgemm-blast-dynamic-plain.json: $(BENCH_BLAST)
$(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_plain<double>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/dgemm-blast-dynamic-plain.json
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-plain.json: $(BENCH_BLAST)
taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_plain<double>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-plain.json

${BENCH_DATA}/sgemm-blast-static-panel.json: $(BENCH_BLAST)
$(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel<float, .+>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/sgemm-blast-static-panel.json
taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel<float, .+>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/sgemm-blast-static-panel.json

${BENCH_DATA}/sgemm-blast-dynamic-panel.json: $(BENCH_BLAST)
$(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel<float>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/sgemm-blast-dynamic-panel.json
taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel<float>" $(BENCHMARK_OPTIONS) \
--benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/sgemm-blast-dynamic-panel.json

${BENCH_DATA}/dgemm-libxsmm.json: $(BENCH_LIBXSMM)
$(BENCH_LIBXSMM) --benchmark_filter="BM_gemm_nt<double>" $(BENCHMARK_OPTIONS) \
Expand All @@ -94,16 +96,17 @@ ${BENCH_DATA}/sgemm-libxsmm.json: $(BENCH_LIBXSMM)
--benchmark_out=${BENCH_DATA}/sgemm-libxsmm.json

dgemm-benchmarks: \
$(shell mkdir -p ${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}) \
${BENCH_DATA}/dgemm-openblas.json \
${BENCH_DATA}/dgemm-mkl.json \
${BENCH_DATA}/dgemm-libxsmm.json \
${BENCH_DATA}/dgemm-blasfeo.json \
${BENCH_DATA}/dgemm-blaze-static.json \
${BENCH_DATA}/dgemm-eigen-static.json \
${BENCH_DATA}/dgemm-blast-static-panel.json \
${BENCH_DATA}/dgemm-blast-dynamic-panel.json \
${BENCH_DATA}/dgemm-blast-static-plain.json \
${BENCH_DATA}/dgemm-blast-dynamic-plain.json
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-panel.json \
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-panel.json \
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-plain.json \
${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-plain.json


#
Expand Down Expand Up @@ -238,4 +241,4 @@ ${BENCH_IMAGE}/mpc_software.pdf_tex: ${BENCH_IMAGE}/mpc_software.svg
/usr/bin/inkscape --without-gui --file=${BENCH_IMAGE}/mpc_software.svg --export-pdf=${BENCH_IMAGE}/mpc_software.pdf --export-latex --export-area-drawing

${BENCH_IMAGE}/mpc_software.pdf: ${BENCH_IMAGE}/mpc_software.svg
/usr/bin/inkscape --without-gui --file=${BENCH_IMAGE}/mpc_software.svg --export-pdf=${BENCH_IMAGE}/mpc_software.pdf --export-area-drawing
/usr/bin/inkscape --without-gui --file=${BENCH_IMAGE}/mpc_software.svg --export-pdf=${BENCH_IMAGE}/mpc_software.pdf --export-area-drawing
27 changes: 18 additions & 9 deletions bench/analysis/dgemm_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,18 @@
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import json

import glob
import pathlib

def filter_aggregate(benchmarks, name):
return [b for b in benchmarks if b['aggregate_name'] == name]
result = []
for b in benchmarks:
try:
if b['aggregate_name'] == name:
result.append(b)
except KeyError:
continue
return result


factor = 1e+9 # Giga
Expand All @@ -17,20 +25,21 @@ def filter_aggregate(benchmarks, name):

plots = [
# {'data_file': 'dgemm-openblas.json', 'label': 'OpenBLAS'},
{'data_file': 'dgemm-mkl.json', 'label': 'MKL'},
# {'data_file': 'dgemm-mkl.json', 'label': 'MKL'},
{'data_file': 'dgemm-blasfeo.json', 'label': 'BLASFEO'},
# {'data_file': 'dgemm-blasfeo-blas.json', 'label': 'BLASFEO*'},
{'data_file': 'dgemm-libxsmm.json', 'label': 'LIBXSMM'},
# {'data_file': 'dgemm-libxsmm.json', 'label': 'LIBXSMM'},
# {'data_file': 'dgemm-eigen-dynamic.json', 'label': 'Eigen (D)'},
# {'data_file': 'dgemm-eigen-static.json', 'label': 'Eigen (S)'},
# {'data_file': 'dgemm-blaze-dynamic.json', 'label': 'Blaze (D)'},
{'data_file': 'dgemm-blaze-static.json', 'label': 'Blaze (S)'},
{'data_file': 'dgemm-blast-static-panel.json', 'label': 'BLAST (SP)'},
{'data_file': 'dgemm-blast-static-plain.json', 'label': 'BLAST (SD)'},
{'data_file': 'dgemm-blast-dynamic-panel.json', 'label': 'BLAST (DP)'},
{'data_file': 'dgemm-blast-dynamic-plain.json', 'label': 'BLAST (DD)'},
# {'data_file': 'dgemm-blaze-static.json', 'label': 'Blaze (S)'},
]

for benchmark_file, benchmark_label in [('dgemm-blast-static-panel.json', 'SP'), ('dgemm-blast-static-plain.json', 'SD'), ('dgemm-blast-dynamic-panel.json', 'DP'), ('dgemm-blast-dynamic-plain.json', 'DD')]:
files = glob.glob('./**/' + benchmark_file, recursive=True, root_dir='bench_result/data')
for file in files:
plots.append({'data_file': file, 'label': f'BLAST ({benchmark_label}) {pathlib.Path(file).parent.stem}'})

fig = plt.figure(figsize=[10, 6])
ax = fig.subplots()

Expand Down
10 changes: 8 additions & 2 deletions bench/analysis/dgemm_performance_ratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@


def filter_aggregate(benchmarks, name):
return [b for b in benchmarks if b['aggregate_name'] == name]

result = []
for b in benchmarks:
try:
if b['aggregate_name'] == name:
result.append(b)
except KeyError:
continue
return result

def load_benchmark(file_name):
with open(file_name) as f:
Expand Down
6 changes: 3 additions & 3 deletions bench/blast/math/dense/DynamicGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include <blast/math/dense/Gemm.hpp>
#include <blast/math/algorithm/Gemm.hpp>
#include <blast/math/Matrix.hpp>
#include <blast/blaze/Math.hpp>

#include <bench/Gemm.hpp>

#include <blaze/math/DynamicMatrix.h>

#include <test/Randomize.hpp>


Expand Down
6 changes: 3 additions & 3 deletions bench/blast/math/dense/StaticGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include <blast/math/dense/Gemm.hpp>
#include <blast/math/algorithm/Gemm.hpp>
#include <blast/math/Matrix.hpp>
#include <blast/blaze/Math.hpp>

#include <bench/Gemm.hpp>

#include <blaze/math/StaticMatrix.h>

#include <test/Randomize.hpp>


Expand Down
2 changes: 1 addition & 1 deletion bench/blast/math/dense/StaticIamax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include <blast/math/dense/Iamax.hpp>

#include <blaze/math/DynamicVector.h>
#include <blast/blaze/Math.hpp>

#include <bench/Iamax.hpp>
#include <bench/Complexity.hpp>
Expand Down
7 changes: 2 additions & 5 deletions bench/blast/math/dense/StaticTrmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
// license that can be found in the LICENSE file.

#include <blast/math/dense/Trmm.hpp>

#include <blaze/math/StaticMatrix.h>
#include <blast/math/Matrix.hpp>
#include <blast/blaze/Math.hpp>

#include <bench/Gemm.hpp>
#include <test/Randomize.hpp>

#include <random>
#include <memory>


namespace blast :: benchmark
{
Expand Down
2 changes: 1 addition & 1 deletion bench/blast/math/panel/DynamicGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.

#include <blast/math/DynamicPanelMatrix.hpp>
#include <blast/math/panel/Gemm.hpp>
#include <blast/math/algorithm/Gemm.hpp>

#include <bench/Gemm.hpp>

Expand Down
4 changes: 3 additions & 1 deletion bench/blast/math/panel/StaticGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
// license that can be found in the LICENSE file.

#include <blast/math/StaticPanelMatrix.hpp>
#include <blast/math/panel/Gemm.hpp>
#include <blast/math/Matrix.hpp>
#include <blast/math/algorithm/Gemm.hpp>
#include <blast/blaze/Math.hpp>

#include <bench/Gemm.hpp>

Expand Down
6 changes: 3 additions & 3 deletions bench/blast/math/simd/Trmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include <blast/math/dense/StaticMatrixPointer.hpp>
#include <blast/math/Matrix.hpp>
#include <blast/math/RegisterMatrix.hpp>

#include <bench/Benchmark.hpp>

#include <test/Randomize.hpp>

#include <blaze/math/StaticMatrix.h>
#include <blast/blaze/Math.hpp>


namespace blast :: benchmark
Expand Down Expand Up @@ -81,4 +81,4 @@ namespace blast :: benchmark
BENCHMARK_TEMPLATE(BM_RegisterMatrix_trmmRightLower, float, 24, 4, columnMajor);
BENCHMARK_TEMPLATE(BM_RegisterMatrix_trmmRightLower, float, 16, 5, columnMajor);
BENCHMARK_TEMPLATE(BM_RegisterMatrix_trmmRightLower, float, 16, 6, columnMajor);
}
}
10 changes: 10 additions & 0 deletions include/blast/blaze/Math.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/blaze/math/Vector.hpp>
#include <blast/blaze/math/TypeTraits.hpp>

#include <blaze/Math.h>
12 changes: 12 additions & 0 deletions include/blast/blaze/math/TypeTraits.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/blaze/math/typetraits/IsContiguous.hpp>
#include <blast/blaze/math/typetraits/IsStatic.hpp>
#include <blast/blaze/math/typetraits/IsStaticallySpaced.hpp>
#include <blast/blaze/math/typetraits/IsDenseVector.hpp>
#include <blast/blaze/math/typetraits/IsDenseMatrix.hpp>
#include <blast/blaze/math/typetraits/Spacing.hpp>
38 changes: 38 additions & 0 deletions include/blast/blaze/math/Vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/blaze/math/typetraits/IsContiguous.hpp>

#include <blaze/math/typetraits/IsDenseVector.h>
#include <blaze/math/typetraits/IsView.h>


namespace blaze
{
/**
* @brief Memory distance between consecutive elements of a dense Blaze vector
*
* NOTE: The function is declared in blaze namespace s.t. it can be found by ADL.
*
* @tparam VT vector type
*
* @param v vector
*
* @return memory distance between consecutive elements of @a v
*/
template <typename VT>
requires blaze::IsDenseVector_v<VT>
inline size_t spacing(VT const& v) noexcept
{
if constexpr (IsContiguous_v<VT>)
return 1;
else
if constexpr (blaze::IsView_v<VT>)
return spacing(v.operand());
else
static_assert(false, "Spacing is not defined for a type which is not a view and is not contiguous");
}
}
41 changes: 41 additions & 0 deletions include/blast/blaze/math/typetraits/IsContiguous.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/math/typetraits/IsContiguous.hpp>

#include <blaze/math/Aliases.h>
#include <blaze/math/typetraits/IsContiguous.h>
#include <blaze/math/typetraits/IsVector.h>
#include <blaze/math/typetraits/IsTransExpr.h>

#include <type_traits>


namespace blast
{
/**
* @brief Specialization for Blaze vectors which are not transpose expressions
*
* @tparam T type
*/
template <typename T>
requires blaze::IsVector_v<T> && (!blaze::IsTransExpr_v<T>)
struct IsContiguous<T> : blaze::IsContiguous<T> {};


/**
* @brief Specialization for Blaze vector transpose expressions
*
* The trasnposed vector expression is contiguous iff its operand is contiguous.
*
* This specialization is required to fix this Blaze bug: https://bitbucket.org/blaze-lib/blaze/issues/474
*
* @tparam T type
*/
template <typename T>
requires blaze::IsVector_v<T> && blaze::IsTransExpr_v<T>
struct IsContiguous<T> : IsContiguous<std::remove_reference_t<blaze::Operand_t<T>>> {};
}
Loading