diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b0ad928..dddf56c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,10 +40,12 @@ target_link_libraries(blast ) target_compile_options(blast - INTERFACE "-Wno-ignored-attributes" "-fno-math-errno" "-ftemplate-backtrace-limit=0" - # Enable SIMD instruction sets, otherwise it does not compile. - # This will change when we support multiple architectures. - INTERFACE "-march=native" "-mfma" "-mavx" "-mavx2" "-msse4" + INTERFACE "-Wno-ignored-attributes" "-fno-math-errno" +) + +target_compile_options(blast + INTERFACE + $<$:-ftemplate-backtrace-limit=0> ) # BLAST_WITH_BLASFEO diff --git a/Makefile b/Makefile index 9fb445e7..4f8dc708 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,10 @@ BENCH_BLASFEO = build/bin/bench-blasfeo BENCH_BLAZE = build/bin/bench-blaze BENCH_EIGEN = build/bin/bench-eigen BENCH_BLAST = build/bin/bench-blast +BENCH_BLAST_OUTPUT_DIR = $(shell git rev-parse --short HEAD) +BENCH_BLAST_CPU_CORE = 11 BENCH_LIBXSMM = build/bin/bench-libxsmm -BENCHMARK_OPTIONS = --benchmark_repetitions=5 --benchmark_counters_tabular=true --benchmark_out_format=json --benchmark_report_aggregates_only=true +BENCHMARK_OPTIONS = --benchmark_repetitions=30 --benchmark_counters_tabular=true --benchmark_out_format=json --benchmark_enable_random_interleaving=true --benchmark_min_warmup_time=10 --benchmark_min_time=1000000x RUN_MATLAB = matlab -nodisplay -nosplash -nodesktop -r BENCH_DATA = bench_result/data BENCH_IMAGE = bench_result/image @@ -61,29 +63,29 @@ ${BENCH_DATA}/sgemm-blaze-static.json: $(BENCH_BLAZE) $(BENCH_BLAZE) --benchmark_filter="BM_gemm_static*" $(BENCHMARK_OPTIONS) \ --benchmark_out=${BENCH_DATA}/sgemm-blaze-static.json -${BENCH_DATA}/dgemm-blast-static-panel.json: $(BENCH_BLAST) - $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel" $(BENCHMARK_OPTIONS) \ - --benchmark_out=${BENCH_DATA}/dgemm-blast-static-panel.json +${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-panel.json: $(BENCH_BLAST) + taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel" $(BENCHMARK_OPTIONS) \ + --benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-panel.json -${BENCH_DATA}/dgemm-blast-dynamic-panel.json: $(BENCH_BLAST) - $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel" $(BENCHMARK_OPTIONS) \ - --benchmark_out=${BENCH_DATA}/dgemm-blast-dynamic-panel.json +${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-panel.json: $(BENCH_BLAST) + taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel" $(BENCHMARK_OPTIONS) \ + --benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-panel.json -${BENCH_DATA}/dgemm-blast-static-plain.json: $(BENCH_BLAST) - $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_plain" $(BENCHMARK_OPTIONS) \ - --benchmark_out=${BENCH_DATA}/dgemm-blast-static-plain.json +${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-plain.json: $(BENCH_BLAST) + taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_plain" $(BENCHMARK_OPTIONS) \ + --benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-plain.json -${BENCH_DATA}/dgemm-blast-dynamic-plain.json: $(BENCH_BLAST) - $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_plain" $(BENCHMARK_OPTIONS) \ - --benchmark_out=${BENCH_DATA}/dgemm-blast-dynamic-plain.json +${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-plain.json: $(BENCH_BLAST) + taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_plain" $(BENCHMARK_OPTIONS) \ + --benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-plain.json ${BENCH_DATA}/sgemm-blast-static-panel.json: $(BENCH_BLAST) - $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel" $(BENCHMARK_OPTIONS) \ - --benchmark_out=${BENCH_DATA}/sgemm-blast-static-panel.json + taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_static_panel" $(BENCHMARK_OPTIONS) \ + --benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/sgemm-blast-static-panel.json ${BENCH_DATA}/sgemm-blast-dynamic-panel.json: $(BENCH_BLAST) - $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel" $(BENCHMARK_OPTIONS) \ - --benchmark_out=${BENCH_DATA}/sgemm-blast-dynamic-panel.json + taskset -c ${BENCH_BLAST_CPU_CORE} $(BENCH_BLAST) --benchmark_filter="BM_gemm_dynamic_panel" $(BENCHMARK_OPTIONS) \ + --benchmark_out=${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/sgemm-blast-dynamic-panel.json ${BENCH_DATA}/dgemm-libxsmm.json: $(BENCH_LIBXSMM) $(BENCH_LIBXSMM) --benchmark_filter="BM_gemm_nt" $(BENCHMARK_OPTIONS) \ @@ -94,16 +96,17 @@ ${BENCH_DATA}/sgemm-libxsmm.json: $(BENCH_LIBXSMM) --benchmark_out=${BENCH_DATA}/sgemm-libxsmm.json dgemm-benchmarks: \ + $(shell mkdir -p ${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}) \ ${BENCH_DATA}/dgemm-openblas.json \ ${BENCH_DATA}/dgemm-mkl.json \ ${BENCH_DATA}/dgemm-libxsmm.json \ ${BENCH_DATA}/dgemm-blasfeo.json \ ${BENCH_DATA}/dgemm-blaze-static.json \ ${BENCH_DATA}/dgemm-eigen-static.json \ - ${BENCH_DATA}/dgemm-blast-static-panel.json \ - ${BENCH_DATA}/dgemm-blast-dynamic-panel.json \ - ${BENCH_DATA}/dgemm-blast-static-plain.json \ - ${BENCH_DATA}/dgemm-blast-dynamic-plain.json + ${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-panel.json \ + ${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-panel.json \ + ${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-static-plain.json \ + ${BENCH_DATA}/${BENCH_BLAST_OUTPUT_DIR}/dgemm-blast-dynamic-plain.json # @@ -238,4 +241,4 @@ ${BENCH_IMAGE}/mpc_software.pdf_tex: ${BENCH_IMAGE}/mpc_software.svg /usr/bin/inkscape --without-gui --file=${BENCH_IMAGE}/mpc_software.svg --export-pdf=${BENCH_IMAGE}/mpc_software.pdf --export-latex --export-area-drawing ${BENCH_IMAGE}/mpc_software.pdf: ${BENCH_IMAGE}/mpc_software.svg - /usr/bin/inkscape --without-gui --file=${BENCH_IMAGE}/mpc_software.svg --export-pdf=${BENCH_IMAGE}/mpc_software.pdf --export-area-drawing \ No newline at end of file + /usr/bin/inkscape --without-gui --file=${BENCH_IMAGE}/mpc_software.svg --export-pdf=${BENCH_IMAGE}/mpc_software.pdf --export-area-drawing diff --git a/bench/analysis/dgemm_performance.py b/bench/analysis/dgemm_performance.py index 61c51277..e64e914b 100644 --- a/bench/analysis/dgemm_performance.py +++ b/bench/analysis/dgemm_performance.py @@ -2,10 +2,18 @@ matplotlib.use("Agg") import matplotlib.pyplot as plt import json - +import glob +import pathlib def filter_aggregate(benchmarks, name): - return [b for b in benchmarks if b['aggregate_name'] == name] + result = [] + for b in benchmarks: + try: + if b['aggregate_name'] == name: + result.append(b) + except KeyError: + continue + return result factor = 1e+9 # Giga @@ -17,20 +25,21 @@ def filter_aggregate(benchmarks, name): plots = [ # {'data_file': 'dgemm-openblas.json', 'label': 'OpenBLAS'}, - {'data_file': 'dgemm-mkl.json', 'label': 'MKL'}, + # {'data_file': 'dgemm-mkl.json', 'label': 'MKL'}, {'data_file': 'dgemm-blasfeo.json', 'label': 'BLASFEO'}, # {'data_file': 'dgemm-blasfeo-blas.json', 'label': 'BLASFEO*'}, - {'data_file': 'dgemm-libxsmm.json', 'label': 'LIBXSMM'}, + # {'data_file': 'dgemm-libxsmm.json', 'label': 'LIBXSMM'}, # {'data_file': 'dgemm-eigen-dynamic.json', 'label': 'Eigen (D)'}, # {'data_file': 'dgemm-eigen-static.json', 'label': 'Eigen (S)'}, # {'data_file': 'dgemm-blaze-dynamic.json', 'label': 'Blaze (D)'}, - {'data_file': 'dgemm-blaze-static.json', 'label': 'Blaze (S)'}, - {'data_file': 'dgemm-blast-static-panel.json', 'label': 'BLAST (SP)'}, - {'data_file': 'dgemm-blast-static-plain.json', 'label': 'BLAST (SD)'}, - {'data_file': 'dgemm-blast-dynamic-panel.json', 'label': 'BLAST (DP)'}, - {'data_file': 'dgemm-blast-dynamic-plain.json', 'label': 'BLAST (DD)'}, + # {'data_file': 'dgemm-blaze-static.json', 'label': 'Blaze (S)'}, ] +for benchmark_file, benchmark_label in [('dgemm-blast-static-panel.json', 'SP'), ('dgemm-blast-static-plain.json', 'SD'), ('dgemm-blast-dynamic-panel.json', 'DP'), ('dgemm-blast-dynamic-plain.json', 'DD')]: + files = glob.glob('./**/' + benchmark_file, recursive=True, root_dir='bench_result/data') + for file in files: + plots.append({'data_file': file, 'label': f'BLAST ({benchmark_label}) {pathlib.Path(file).parent.stem}'}) + fig = plt.figure(figsize=[10, 6]) ax = fig.subplots() diff --git a/bench/analysis/dgemm_performance_ratio.py b/bench/analysis/dgemm_performance_ratio.py index d8addef9..46569527 100644 --- a/bench/analysis/dgemm_performance_ratio.py +++ b/bench/analysis/dgemm_performance_ratio.py @@ -5,8 +5,14 @@ def filter_aggregate(benchmarks, name): - return [b for b in benchmarks if b['aggregate_name'] == name] - + result = [] + for b in benchmarks: + try: + if b['aggregate_name'] == name: + result.append(b) + except KeyError: + continue + return result def load_benchmark(file_name): with open(file_name) as f: diff --git a/bench/blast/math/dense/DynamicGemm.cpp b/bench/blast/math/dense/DynamicGemm.cpp index e0f78260..10d8fb32 100644 --- a/bench/blast/math/dense/DynamicGemm.cpp +++ b/bench/blast/math/dense/DynamicGemm.cpp @@ -2,12 +2,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include +#include +#include #include -#include - #include diff --git a/bench/blast/math/dense/StaticGemm.cpp b/bench/blast/math/dense/StaticGemm.cpp index 22d6f68e..2565d36a 100644 --- a/bench/blast/math/dense/StaticGemm.cpp +++ b/bench/blast/math/dense/StaticGemm.cpp @@ -2,12 +2,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include +#include +#include #include -#include - #include diff --git a/bench/blast/math/dense/StaticIamax.cpp b/bench/blast/math/dense/StaticIamax.cpp index a3a53101..6fab6a87 100644 --- a/bench/blast/math/dense/StaticIamax.cpp +++ b/bench/blast/math/dense/StaticIamax.cpp @@ -14,7 +14,7 @@ #include -#include +#include #include #include diff --git a/bench/blast/math/dense/StaticTrmm.cpp b/bench/blast/math/dense/StaticTrmm.cpp index 1365197b..1cc263ad 100644 --- a/bench/blast/math/dense/StaticTrmm.cpp +++ b/bench/blast/math/dense/StaticTrmm.cpp @@ -3,15 +3,12 @@ // license that can be found in the LICENSE file. #include - -#include +#include +#include #include #include -#include -#include - namespace blast :: benchmark { diff --git a/bench/blast/math/panel/DynamicGemm.cpp b/bench/blast/math/panel/DynamicGemm.cpp index 96ac4d62..3b8866d0 100644 --- a/bench/blast/math/panel/DynamicGemm.cpp +++ b/bench/blast/math/panel/DynamicGemm.cpp @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. #include -#include +#include #include diff --git a/bench/blast/math/panel/StaticGemm.cpp b/bench/blast/math/panel/StaticGemm.cpp index 0d4f4ee2..0c201346 100644 --- a/bench/blast/math/panel/StaticGemm.cpp +++ b/bench/blast/math/panel/StaticGemm.cpp @@ -3,7 +3,9 @@ // license that can be found in the LICENSE file. #include -#include +#include +#include +#include #include diff --git a/bench/blast/math/simd/Trmm.cpp b/bench/blast/math/simd/Trmm.cpp index 9ab4a7c3..5cc29978 100644 --- a/bench/blast/math/simd/Trmm.cpp +++ b/bench/blast/math/simd/Trmm.cpp @@ -2,14 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include #include #include #include -#include +#include namespace blast :: benchmark @@ -81,4 +81,4 @@ namespace blast :: benchmark BENCHMARK_TEMPLATE(BM_RegisterMatrix_trmmRightLower, float, 24, 4, columnMajor); BENCHMARK_TEMPLATE(BM_RegisterMatrix_trmmRightLower, float, 16, 5, columnMajor); BENCHMARK_TEMPLATE(BM_RegisterMatrix_trmmRightLower, float, 16, 6, columnMajor); -} \ No newline at end of file +} diff --git a/include/blast/blaze/Math.hpp b/include/blast/blaze/Math.hpp new file mode 100644 index 00000000..48c1c3cc --- /dev/null +++ b/include/blast/blaze/Math.hpp @@ -0,0 +1,10 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include + +#include diff --git a/include/blast/blaze/math/TypeTraits.hpp b/include/blast/blaze/math/TypeTraits.hpp new file mode 100644 index 00000000..40737815 --- /dev/null +++ b/include/blast/blaze/math/TypeTraits.hpp @@ -0,0 +1,12 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include +#include diff --git a/include/blast/blaze/math/Vector.hpp b/include/blast/blaze/math/Vector.hpp new file mode 100644 index 00000000..452cc7d4 --- /dev/null +++ b/include/blast/blaze/math/Vector.hpp @@ -0,0 +1,38 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include + + +namespace blaze +{ + /** + * @brief Memory distance between consecutive elements of a dense Blaze vector + * + * NOTE: The function is declared in blaze namespace s.t. it can be found by ADL. + * + * @tparam VT vector type + * + * @param v vector + * + * @return memory distance between consecutive elements of @a v + */ + template + requires blaze::IsDenseVector_v + inline size_t spacing(VT const& v) noexcept + { + if constexpr (IsContiguous_v) + return 1; + else + if constexpr (blaze::IsView_v) + return spacing(v.operand()); + else + static_assert(false, "Spacing is not defined for a type which is not a view and is not contiguous"); + } +} diff --git a/include/blast/blaze/math/typetraits/IsContiguous.hpp b/include/blast/blaze/math/typetraits/IsContiguous.hpp new file mode 100644 index 00000000..544b4486 --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsContiguous.hpp @@ -0,0 +1,41 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include +#include +#include + +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze vectors which are not transpose expressions + * + * @tparam T type + */ + template + requires blaze::IsVector_v && (!blaze::IsTransExpr_v) + struct IsContiguous : blaze::IsContiguous {}; + + + /** + * @brief Specialization for Blaze vector transpose expressions + * + * The trasnposed vector expression is contiguous iff its operand is contiguous. + * + * This specialization is required to fix this Blaze bug: https://bitbucket.org/blaze-lib/blaze/issues/474 + * + * @tparam T type + */ + template + requires blaze::IsVector_v && blaze::IsTransExpr_v + struct IsContiguous : IsContiguous>> {}; +} diff --git a/include/blast/blaze/math/typetraits/IsDenseMatrix.hpp b/include/blast/blaze/math/typetraits/IsDenseMatrix.hpp new file mode 100644 index 00000000..e03e6c5a --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsDenseMatrix.hpp @@ -0,0 +1,23 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze matrices + * + * @tparam T matrix type + */ + template + requires blaze::IsMatrix_v + struct IsDenseMatrix : blaze::IsDenseMatrix {}; +} diff --git a/include/blast/blaze/math/typetraits/IsDenseVector.hpp b/include/blast/blaze/math/typetraits/IsDenseVector.hpp new file mode 100644 index 00000000..eaeae86e --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsDenseVector.hpp @@ -0,0 +1,23 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze vectors + * + * @tparam T vector type + */ + template + requires blaze::IsVector_v + struct IsDenseVector : blaze::IsDenseVector {}; +} diff --git a/include/blast/blaze/math/typetraits/IsStatic.hpp b/include/blast/blaze/math/typetraits/IsStatic.hpp new file mode 100644 index 00000000..3b0031ba --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsStatic.hpp @@ -0,0 +1,24 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze matrix and vector types + * + * @tparam T matrix or vector type + */ + template + requires blaze::IsVector_v || blaze::IsMatrix_v + struct IsStatic : blaze::IsStatic {}; +} diff --git a/include/blast/blaze/math/typetraits/IsStaticallySpaced.hpp b/include/blast/blaze/math/typetraits/IsStaticallySpaced.hpp new file mode 100644 index 00000000..4184ab83 --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsStaticallySpaced.hpp @@ -0,0 +1,44 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include + +#include +#include +#include +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze vectors which are not views + * + * Blaze vectors which are not views are statically spaced iff they are contiguous + * + * @tparam VT vector type + */ + template + requires blaze::IsVector_v && (!blaze::IsView_v) + struct IsStaticallySpaced : IsContiguous {}; + + + /** + * @brief Specialization for Blaze vectors which are views + * + * Blaze vectors which are views are statically spaced iff they are contiguous or their viewed type is static + * + * @tparam VT vector type + */ + template + requires blaze::IsVector_v && blaze::IsView_v + struct IsStaticallySpaced + : std::integral_constant || blaze::IsStatic_v> + > + {}; +} diff --git a/include/blast/blaze/math/typetraits/Spacing.hpp b/include/blast/blaze/math/typetraits/Spacing.hpp new file mode 100644 index 00000000..6ead89fc --- /dev/null +++ b/include/blast/blaze/math/typetraits/Spacing.hpp @@ -0,0 +1,59 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze dense matrices which are not transpose expressions + * + * @tparam MT matrix type + */ + template + requires blaze::IsDenseMatrix_v && (!blaze::IsTransExpr_v) + struct Spacing : std::integral_constant {}; + + + /** + * @brief Specialization for Blaze dense matrix transpose expressions + * + * @tparam MT matrix type + */ + template + requires blaze::IsDenseMatrix_v && blaze::IsTransExpr_v + struct Spacing : Spacing>> {}; + + + /** + * @brief Specialization for contiguous dense Blaze vectors + * + * @tparam VT vector type + */ + template + requires blaze::IsDenseVector_v && IsContiguous_v + struct Spacing : std::integral_constant {}; + + + /** + * @brief Specialization for non-contiguous dense Blaze vector views + * + * @tparam VT vector type + */ + template + requires blaze::IsDenseVector_v && (!IsContiguous_v) && blaze::IsView_v + struct Spacing : Spacing> {}; +} diff --git a/include/blast/math/Matrix.hpp b/include/blast/math/Matrix.hpp new file mode 100644 index 00000000..7002226d --- /dev/null +++ b/include/blast/math/Matrix.hpp @@ -0,0 +1,47 @@ +// Copyright (c) 2019-2024 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace blast +{ + /** + * @brief Pointer to the first element of a matrix + * + * @tparam MT matrix type + * + * @param m matrix + * + * @return pointer to @a m(0, 0) + */ + template + BLAST_ALWAYS_INLINE auto ptr(MT& m) + { + return ptr>(m, 0, 0); + } + + + /** + * @brief Pointer to the first element of a const matrix + * + * @tparam MT matrix type + * + * @param m matrix + * + * @return pointer to @a m(0, 0) + */ + template + BLAST_ALWAYS_INLINE auto ptr(MT const& m) + { + return ptr>(m, 0, 0); + } +} diff --git a/include/blast/math/StaticPanelMatrix.hpp b/include/blast/math/StaticPanelMatrix.hpp index b191d789..cfbcde3f 100644 --- a/include/blast/math/StaticPanelMatrix.hpp +++ b/include/blast/math/StaticPanelMatrix.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include diff --git a/include/blast/math/TypeTraits.hpp b/include/blast/math/TypeTraits.hpp index a65b0ac3..bff67adc 100644 --- a/include/blast/math/TypeTraits.hpp +++ b/include/blast/math/TypeTraits.hpp @@ -1,25 +1,22 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once #include #include #include +#include +#include #include #include +#include +#include +#include #include #include #include #include +#include +#include diff --git a/include/blast/math/Vector.hpp b/include/blast/math/Vector.hpp new file mode 100644 index 00000000..f1026b29 --- /dev/null +++ b/include/blast/math/Vector.hpp @@ -0,0 +1,45 @@ +// Copyright 2023-2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include + + +namespace blast +{ + /** + * @brief Pointer to the first element of a vector + * + * @tparam VT vector type + * @param v vector + * + * @return pointer to the first element of @a v + */ + template + requires IsDenseVector_v + BLAST_ALWAYS_INLINE auto ptr(VT& v) + { + return ptr>(v, 0); + } + + + /** + * @brief Pointer to the first element of a const vector + * + * @tparam VT vector type + * @param v vector + * + * @return pointer to the first element of @a v + */ + template + requires IsDenseVector_v + BLAST_ALWAYS_INLINE auto ptr(VT const& v) + { + return ptr>(v, 0); + } +} diff --git a/include/blast/math/algorithm/Gemm.hpp b/include/blast/math/algorithm/Gemm.hpp index 211c3acb..ae80d0d1 100644 --- a/include/blast/math/algorithm/Gemm.hpp +++ b/include/blast/math/algorithm/Gemm.hpp @@ -4,15 +4,11 @@ #pragma once -#include -#include +#include #include #include - -#include - -#include -#include +#include +#include namespace blast @@ -26,12 +22,12 @@ namespace blast * alpha and beta are scalars, and A, B and C are matrices, with A * an m by k matrix, B a k by n matrix and C an m by n matrix. * - * @tparam ST1 - * @tparam MPA - * @tparam MPB - * @tparam ST2 - * @tparam MPC - * @tparam MPD + * @tparam ST1 scalar type for @a alpha + * @tparam MPA matrix pointer type for @a A + * @tparam MPB matrix pointer type for @a B + * @tparam ST2 scalar type for @a beta + * @tparam MPC matrix pointer type for @a C + * @tparam MPD matrix pointer type for @a D * * @param M the number of rows of the matrices A, C, and D. * @param N the number of columns of the matrices B and C. @@ -44,24 +40,15 @@ namespace blast * @param D the output matrix D */ template < - typename ST1, typename MPA, typename MPB, - typename ST2, typename MPC, typename MPD + typename ST1, MatrixPointer MPA, MatrixPointer MPB, + typename ST2, MatrixPointer MPC, MatrixPointer MPD > - requires ( - MatrixPointer && StorageOrder_v == columnMajor && - MatrixPointer && - MatrixPointer && StorageOrder_v == columnMajor && - MatrixPointer && StorageOrder_v == columnMajor - ) - BLAST_ALWAYS_INLINE void gemm(size_t M, size_t N, size_t K, ST1 alpha, MPA A, MPB B, ST2 beta, MPC C, MPD D) + inline void gemm(size_t M, size_t N, size_t K, ST1 alpha, MPA A, MPB B, ST2 beta, MPC C, MPD D) { using ET = std::remove_cv_t>; - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(std::remove_cv_t>, ET); - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(std::remove_cv_t>, ET); - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(std::remove_cv_t>, ET); - tile)>( + xsimd::default_arch {}, D.cachePreferredTraversal, M, N, [&] (auto& ker, size_t i, size_t j) @@ -74,4 +61,60 @@ namespace blast } ); } -} \ No newline at end of file + + + /** + * @brief Matrix-matrix multiplication for @a DenseMatrix arguments + * + * D := alpha*A*B + beta*C + * + * alpha and beta are scalars, and A, B and C are matrices, with A + * an m by k matrix, B a k by n matrix and C an m by n matrix. + * + * @param alpha the scalar alpha + * @param A the matrix A + * @param B the matrix B + * @param beta the scalar beta + * @param C the matrix C + * @param D the output matrix D + */ + template + inline void gemm(ST1 alpha, MT1 const& A, MT2 const& B, ST2 beta, MT3 const& C, MT4& D) + { + size_t const M = rows(A); + size_t const N = columns(B); + size_t const K = columns(A); + + if (rows(B) != K) + BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); + + if (rows(C) != M || columns(C) != N) + BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); + + if (rows(D) != M || columns(D) != N) + BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); + + gemm(M, N, K, alpha, ptr(*A), ptr(*B), beta, ptr(*C), ptr(*D)); + } + + + /** + * @brief Matrix-matrix multiplication for @a DenseMatrix arguments + * + * D := A*B + C + * + * A, B and C are matrices, with A + * an m by k matrix, B a k by n matrix and C an m by n matrix. + * + * @param A the matrix A + * @param B the matrix B + * @param C the matrix C + * @param D the output matrix D + */ + template + inline void gemm(MT1 const& A, MT2 const& B, MT3 const& C, MT4& D) + { + using ET = ElementType_t; + gemm(ET(1.), A, B, ET(1.), C, D); + } +} diff --git a/include/blast/math/algorithm/Tile.hpp b/include/blast/math/algorithm/Tile.hpp index 41d7aadb..7f6ecc63 100644 --- a/include/blast/math/algorithm/Tile.hpp +++ b/include/blast/math/algorithm/Tile.hpp @@ -1,56 +1,20 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#if XSIMD_WITH_AVX2 +# include +#endif -#include -#include -#include #include -#include #include namespace blast { - template - BLAZE_ALWAYS_INLINE void tile_backend(size_t m, size_t n, size_t i, FF&& f_full, FP&& f_partial) - { - RegisterMatrix ker; - - if (i + KM <= m) - { - size_t j = 0; - - for (; j + KN <= n; j += KN) - f_full(ker, i, j); - - if (j < n) - f_partial(ker, i, j, KM, n - j); - } - else - { - size_t j = 0; - - for (; j + KN <= n; j += KN) - f_partial(ker, i, j, m - i, KN); - - if (j < n) - f_partial(ker, i, j, m - i, n - j); - } - } - - /** * @brief Cover a matrix with tiles of different sizes in a performance-efficient way. * @@ -69,106 +33,18 @@ namespace blast * * @tparam ET type of matrix elements * @tparam SO matrix storage order - * @tparam F functor type + * @tparam FF functor type for full tiles + * @tparam FP functor type for partial tiles + * @tparam Arch instruction set architecture * * @param m number of matrix rows * @param n number of matrix columns * @param f_full functor to call on full tiles * @param f_partial functor to call on partial tiles */ - template - BLAST_ALWAYS_INLINE void tile(StorageOrder traversal_order, std::size_t m, std::size_t n, FF&& f_full, FP&& f_partial) + template + inline void tile(Arch arch, StorageOrder traversal_order, std::size_t m, std::size_t n, FF&& f_full, FP&& f_partial) { - size_t constexpr SS = SimdSize_v; - size_t constexpr TILE_STEP = 4; // TODO: this is almost arbitrary and needs to be ppoperly determined - - static_assert(SO == columnMajor, "tile() for row-major matrices not implemented"); - - if (traversal_order == columnMajor) - { - size_t j = 0; - - // Main part - for (; j + TILE_STEP <= n; j += TILE_STEP) - { - size_t i = 0; - - // i + 4 * TILE_SIZE != M is to improve performance in case when the remaining number of rows is 4 * TILE_SIZE: - // it is more efficient to apply 2 * TILE_SIZE kernel 2 times than 3 * TILE_SIZE + 1 * TILE_SIZE kernel. - for (; i + 3 * SS <= m && i + 4 * SS != m; i += 3 * SS) - { - RegisterMatrix ker; - f_full(ker, i, j); - } - - for (; i + 2 * SS <= m; i += 2 * SS) - { - RegisterMatrix ker; - f_full(ker, i, j); - } - - for (; i + 1 * SS <= m; i += 1 * SS) - { - RegisterMatrix ker; - f_full(ker, i, j); - } - - // Bottom side - if (i < m) - { - RegisterMatrix ker; - f_partial(ker, i, j, m - i, ker.columns()); - } - } - - - // Right side - if (j < n) - { - size_t i = 0; - - // i + 4 * TILE_STEP != M is to improve performance in case when the remaining number of rows is 4 * TILE_STEP: - // it is more efficient to apply 2 * TILE_STEP kernel 2 times than 3 * TILE_STEP + 1 * TILE_STEP kernel. - for (; i + 3 * SS <= m && i + 4 * SS != m; i += 3 * SS) - { - RegisterMatrix ker; - f_partial(ker, i, j, ker.rows(), n - j); - } - - for (; i + 2 * SS <= m; i += 2 * SS) - { - RegisterMatrix ker; - f_partial(ker, i, j, ker.rows(), n - j); - } - - for (; i + 1 * SS <= m; i += 1 * SS) - { - RegisterMatrix ker; - f_partial(ker, i, j, ker.rows(), n - j); - } - - // Bottom-right corner - if (i < m) - { - RegisterMatrix ker; - f_partial(ker, i, j, m - i, n - j); - } - } - } - else - { - size_t i = 0; - - // i + 4 * SS != M is to improve performance in case when the remaining number of rows is 4 * SS: - // it is more efficient to apply 2 * SS kernel 2 times than 3 * SS + 1 * SS kernel. - for (; i + 2 * SS < m && i + 4 * SS != m; i += 3 * SS) - tile_backend(m, n, i, f_full, f_partial); - - for (; i + 1 * SS < m; i += 2 * SS) - tile_backend(m, n, i, f_full, f_partial); - - for (; i + 0 * SS < m; i += 1 * SS) - tile_backend(m, n, i, f_full, f_partial); - } + detail::tile(arch, traversal_order, m, n, f_full, f_partial); } -} \ No newline at end of file +} diff --git a/include/blast/math/algorithm/arch/avx2/Tile.hpp b/include/blast/math/algorithm/arch/avx2/Tile.hpp new file mode 100644 index 00000000..aac90b8a --- /dev/null +++ b/include/blast/math/algorithm/arch/avx2/Tile.hpp @@ -0,0 +1,140 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include + +#include + +#include + + +namespace blast :: detail +{ + template + BLAST_ALWAYS_INLINE void tile_backend(xsimd::avx2, size_t m, size_t n, size_t i, FF&& f_full, FP&& f_partial) + { + RegisterMatrix ker; + + if (i + KM <= m) + { + size_t j = 0; + + for (; j + KN <= n; j += KN) + f_full(ker, i, j); + + if (j < n) + f_partial(ker, i, j, KM, n - j); + } + else + { + size_t j = 0; + + for (; j + KN <= n; j += KN) + f_partial(ker, i, j, m - i, KN); + + if (j < n) + f_partial(ker, i, j, m - i, n - j); + } + } + + + template + BLAST_ALWAYS_INLINE void tile(xsimd::avx2 const& arch, StorageOrder traversal_order, std::size_t m, std::size_t n, FF&& f_full, FP&& f_partial) + { + size_t constexpr SS = SimdSize_v; + size_t constexpr TILE_STEP = 4; // TODO: this is almost arbitrary and needs to be ppoperly determined + + static_assert(SO == columnMajor, "tile() for row-major matrices not implemented"); + + if (traversal_order == columnMajor) + { + size_t j = 0; + + // Main part + for (; j + TILE_STEP <= n; j += TILE_STEP) + { + size_t i = 0; + + // i + 4 * TILE_SIZE != M is to improve performance in case when the remaining number of rows is 4 * TILE_SIZE: + // it is more efficient to apply 2 * TILE_SIZE kernel 2 times than 3 * TILE_SIZE + 1 * TILE_SIZE kernel. + for (; i + 3 * SS <= m && i + 4 * SS != m; i += 3 * SS) + { + RegisterMatrix ker; + f_full(ker, i, j); + } + + for (; i + 2 * SS <= m; i += 2 * SS) + { + RegisterMatrix ker; + f_full(ker, i, j); + } + + for (; i + 1 * SS <= m; i += 1 * SS) + { + RegisterMatrix ker; + f_full(ker, i, j); + } + + // Bottom side + if (i < m) + { + RegisterMatrix ker; + f_partial(ker, i, j, m - i, ker.columns()); + } + } + + + // Right side + if (j < n) + { + size_t i = 0; + + // i + 4 * TILE_STEP != M is to improve performance in case when the remaining number of rows is 4 * TILE_STEP: + // it is more efficient to apply 2 * TILE_STEP kernel 2 times than 3 * TILE_STEP + 1 * TILE_STEP kernel. + for (; i + 3 * SS <= m && i + 4 * SS != m; i += 3 * SS) + { + RegisterMatrix ker; + f_partial(ker, i, j, ker.rows(), n - j); + } + + for (; i + 2 * SS <= m; i += 2 * SS) + { + RegisterMatrix ker; + f_partial(ker, i, j, ker.rows(), n - j); + } + + for (; i + 1 * SS <= m; i += 1 * SS) + { + RegisterMatrix ker; + f_partial(ker, i, j, ker.rows(), n - j); + } + + // Bottom-right corner + if (i < m) + { + RegisterMatrix ker; + f_partial(ker, i, j, m - i, n - j); + } + } + } + else + { + size_t i = 0; + + // i + 4 * SS != M is to improve performance in case when the remaining number of rows is 4 * SS: + // it is more efficient to apply 2 * SS kernel 2 times than 3 * SS + 1 * SS kernel. + for (; i + 2 * SS < m && i + 4 * SS != m; i += 3 * SS) + tile_backend(arch, m, n, i, f_full, f_partial); + + for (; i + 1 * SS < m; i += 2 * SS) + tile_backend(arch, m, n, i, f_full, f_partial); + + for (; i + 0 * SS < m; i += 1 * SS) + tile_backend(arch, m, n, i, f_full, f_partial); + } + } +} diff --git a/include/blast/math/dense/DynamicMatrixPointer.hpp b/include/blast/math/dense/DynamicMatrixPointer.hpp index 5610b1c9..486441b9 100644 --- a/include/blast/math/dense/DynamicMatrixPointer.hpp +++ b/include/blast/math/dense/DynamicMatrixPointer.hpp @@ -4,14 +4,12 @@ #pragma once - #include #include #include #include #include -#include - +#include namespace blast { @@ -203,51 +201,32 @@ namespace blast template - BLAZE_ALWAYS_INLINE auto trans(DynamicMatrixPointer const& p) noexcept + BLAST_ALWAYS_INLINE auto trans(DynamicMatrixPointer const& p) noexcept { return p.trans(); } - template - requires (!IsStatic_v) - BLAZE_ALWAYS_INLINE DynamicMatrixPointer, SO, AF, IsPadded_v> - ptr(DenseMatrix& m, size_t i, size_t j) + template + requires (!IsStatic_v) && IsDenseMatrix_v + BLAST_ALWAYS_INLINE DynamicMatrixPointer, StorageOrder_v, AF, IsPadded_v> + ptr(MT& m, size_t i, size_t j) { - if constexpr (SO == columnMajor) - return {(*m).data() + i + spacing(m) * j, spacing(m)}; + if constexpr (StorageOrder_v == columnMajor) + return {data(m) + i + spacing(m) * j, spacing(m)}; else - return {(*m).data() + spacing(m) * i + j, spacing(m)}; + return {data(m) + spacing(m) * i + j, spacing(m)}; } - template - requires (!IsStatic_v) - BLAZE_ALWAYS_INLINE DynamicMatrixPointer const, SO, AF, IsPadded_v> - ptr(DenseMatrix const& m, size_t i, size_t j) + template + requires (!IsStatic_v) && IsDenseMatrix_v + BLAST_ALWAYS_INLINE DynamicMatrixPointer const, StorageOrder_v, AF, IsPadded_v> + ptr(MT const& m, size_t i, size_t j) { - if constexpr (SO == columnMajor) - return {(*m).data() + i + spacing(m) * j, spacing(m)}; + if constexpr (StorageOrder_v == columnMajor) + return {data(m) + i + spacing(m) * j, spacing(m)}; else - return {(*m).data() + spacing(m) * i + j, spacing(m)}; - } - - - template - requires (!IsStatic_v) - BLAZE_ALWAYS_INLINE DynamicMatrixPointer const, SO, AF, IsPadded_v> - ptr(DMatTransExpr const& m, size_t i, size_t j) - { - if constexpr (SO == columnMajor) - return {(*m).data() + i + spacing(m) * j, spacing(m)}; - else - return {(*m).data() + spacing(m) * i + j, spacing(m)}; - } - - - template - BLAZE_ALWAYS_INLINE DynamicMatrixPointer ptr(T * p, size_t spacing) - { - return {p, spacing}; + return {data(m) + spacing(m) * i + j, spacing(m)}; } } diff --git a/include/blast/math/dense/DynamicVectorPointer.hpp b/include/blast/math/dense/DynamicVectorPointer.hpp index 7638119a..6843925d 100644 --- a/include/blast/math/dense/DynamicVectorPointer.hpp +++ b/include/blast/math/dense/DynamicVectorPointer.hpp @@ -5,7 +5,9 @@ #pragma once #include +#include #include +#include #include @@ -187,8 +189,50 @@ namespace blast template - BLAZE_ALWAYS_INLINE auto trans(DynamicVectorPointer const& p) noexcept + BLAST_ALWAYS_INLINE auto trans(DynamicVectorPointer const& p) noexcept { return p.trans(); } + + + /** + * @brief Pointer to a dynamically spaced vector + * + * @tparam AF true if the pointer is SIMD-aligned + * @tparam VT vector type + * + * @param v vector + * @param i index within @a v + * + * @return vector pointer to @a i -th element of @a v + */ + template + requires (!IsStaticallySpaced_v) + BLAST_ALWAYS_INLINE auto ptr(VT& v, size_t i) noexcept + { + // NOTE: we don't use data(v) here because of this bug: + // https://bitbucket.org/blaze-lib/blaze/issues/457 + return DynamicVectorPointer, VT::transposeFlag, AF, IsPadded_v> {&v[i], spacing(v)}; + } + + + /** + * @brief Pointer to a dynamically spaced const vector + * + * @tparam AF true if the pointer is SIMD-aligned + * @tparam VT vector type + * + * @param v vector + * @param i index within @a v + * + * @return vector pointer to @a i -th element of @a v + */ + template + requires (!IsStaticallySpaced_v) + BLAST_ALWAYS_INLINE auto ptr(VT const& v, size_t i) noexcept + { + // NOTE: we don't use data(v) here because of this bug: + // https://bitbucket.org/blaze-lib/blaze/issues/457 + return DynamicVectorPointer const, VT::transposeFlag, AF, IsPadded_v> {&v[i], spacing(v)}; + } } diff --git a/include/blast/math/dense/Gemm.hpp b/include/blast/math/dense/Gemm.hpp deleted file mode 100644 index a386baee..00000000 --- a/include/blast/math/dense/Gemm.hpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -#include -#include -#include - -#include -#include -#include - - -namespace blast -{ - /** - * @brief Matrix-matrix multiplication for @a DenseMatrix arguments - * - * D := alpha*A*B + beta*C - * - * alpha and beta are scalars, and A, B and C are matrices, with A - * an m by k matrix, B a k by n matrix and C an m by n matrix. - * - * @param alpha the scalar alpha - * @param A the matrix A - * @param B the matrix B - * @param beta the scalar beta - * @param C the matrix C - * @param D the output matrix D - */ - template < - typename ST1, typename MT1, typename MT2, bool SO2, - typename ST2, typename MT3, typename MT4 - > - inline void gemm( - ST1 alpha, - DenseMatrix const& A, DenseMatrix const& B, - ST2 beta, DenseMatrix const& C, DenseMatrix& D) - { - size_t const M = rows(A); - size_t const N = columns(B); - size_t const K = columns(A); - - if (rows(B) != K) - BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); - - if (rows(C) != M || columns(C) != N) - BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); - - if (rows(D) != M || columns(D) != N) - BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); - - gemm(M, N, K, alpha, ptr(*A), ptr(*B), beta, ptr(*C), ptr(*D)); - } - - - /** - * @brief Matrix-matrix multiplication for @a DenseMatrix arguments - * - * D := A*B + C - * - * A, B and C are matrices, with A - * an m by k matrix, B a k by n matrix and C an m by n matrix. - * - * @param A the matrix A - * @param B the matrix B - * @param C the matrix C - * @param D the output matrix D - */ - template < - typename MT1, typename MT2, bool SO2, - typename MT3, typename MT4 - > - inline void gemm( - DenseMatrix const& A, DenseMatrix const& B, - DenseMatrix const& C, DenseMatrix& D) - { - using ET = ElementType_t; - gemm(ET(1.), A, B, ET(1.), C, D); - } -} \ No newline at end of file diff --git a/include/blast/math/dense/Ger.hpp b/include/blast/math/dense/Ger.hpp index 807412c3..c19557bf 100644 --- a/include/blast/math/dense/Ger.hpp +++ b/include/blast/math/dense/Ger.hpp @@ -1,26 +1,14 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2023-2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once #include - #include #include -#include -#include -#include +#include +#include namespace blast @@ -57,13 +45,9 @@ namespace blast MatrixPointer && (StorageOrder_v == columnMajor) inline void ger(size_t M, size_t N, Scalar alpha, VPX x, VPY y, MPA A, MPB B) { - using ET = ElementType_t; + using ET = Scalar; size_t constexpr TILE_SIZE = TileSize_v; - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); - size_t j = 0; // Main part @@ -182,13 +166,6 @@ namespace blast DenseMatrix& B ) { - using ET = ElementType_t; - size_t constexpr TILE_SIZE = TileSize_v; - - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); - size_t const M = size(x); size_t const N = size(y); @@ -319,4 +296,4 @@ namespace blast { ger(alpha, x, y, A, B); } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/Getf2.hpp b/include/blast/math/dense/Getf2.hpp index 73614b01..bd359835 100644 --- a/include/blast/math/dense/Getf2.hpp +++ b/include/blast/math/dense/Getf2.hpp @@ -19,10 +19,7 @@ #include #include #include -#include -#include - -#include +#include namespace blast @@ -106,7 +103,7 @@ namespace blast template inline void getf2(DenseMatrix& A, size_t * ipiv) { - getf2(rows(*A), columns(*A), ptr(A), ipiv); + getf2(rows(*A), columns(*A), ptr(*A), ipiv); } @@ -165,4 +162,4 @@ namespace blast ); } } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/Getrf.hpp b/include/blast/math/dense/Getrf.hpp index cc94faa0..7b14d59a 100644 --- a/include/blast/math/dense/Getrf.hpp +++ b/include/blast/math/dense/Getrf.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -57,7 +57,7 @@ namespace blast size_t const M = rows(A); size_t const N = columns(A); - auto pA = ptr(A); + auto pA = ptr(*A); size_t k = 0; @@ -160,4 +160,4 @@ namespace blast ipiv[k] = k; } } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/Iamax.hpp b/include/blast/math/dense/Iamax.hpp index 530df07b..16e2e464 100644 --- a/include/blast/math/dense/Iamax.hpp +++ b/include/blast/math/dense/Iamax.hpp @@ -16,7 +16,8 @@ #include -#include +#include +#include #include #include diff --git a/include/blast/math/dense/Laswp.hpp b/include/blast/math/dense/Laswp.hpp index f363e756..844ec02a 100644 --- a/include/blast/math/dense/Laswp.hpp +++ b/include/blast/math/dense/Laswp.hpp @@ -14,9 +14,10 @@ #pragma once - #include +#include + namespace blast { @@ -34,10 +35,10 @@ namespace blast @a k0 ... @a k1 - 1 of @a ipiv are accessed. ipiv[k] = l implies rows k and l are to be interchanged. */ template - inline void laswp(DenseMatrix& A, size_t k0, size_t k1, size_t * ipiv) + inline void laswp(blaze::DenseMatrix& A, size_t k0, size_t k1, size_t * ipiv) { for (size_t k = k0; k < k1; ++k) if (k != ipiv[k]) - swap(row(*A, k, unchecked), row(*A, ipiv[k], unchecked)); + swap(row(*A, k, blaze::unchecked), row(*A, ipiv[k], blaze::unchecked)); } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/MatrixPointer.hpp b/include/blast/math/dense/MatrixPointer.hpp deleted file mode 100644 index 06372ed9..00000000 --- a/include/blast/math/dense/MatrixPointer.hpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include -#include - - -namespace blast -{ - template - BLAZE_ALWAYS_INLINE auto ptr(blaze::DenseMatrix& m) - { - return ptr>(m, 0, 0); - } - - - template - BLAZE_ALWAYS_INLINE auto ptr(blaze::DenseMatrix const& m) - { - return ptr>(m, 0, 0); - } - - - template - BLAZE_ALWAYS_INLINE auto ptr(blaze::DMatTransExpr const& m) - { - return ptr>(m, 0, 0); - } -} \ No newline at end of file diff --git a/include/blast/math/dense/Potrf.hpp b/include/blast/math/dense/Potrf.hpp index 50cfd56d..3b1e1e02 100644 --- a/include/blast/math/dense/Potrf.hpp +++ b/include/blast/math/dense/Potrf.hpp @@ -5,27 +5,23 @@ #pragma once -#include +#include #include #include #include -#include -#include +#include #include namespace blast { - using namespace blaze; - - template BLAZE_ALWAYS_INLINE void potrf_backend(size_t k, size_t i, - DenseMatrix const& A, DenseMatrix& L) + blaze::DenseMatrix const& A, blaze::DenseMatrix& L) { - using ET = ElementType_t; + using ET = blaze::ElementType_t; size_t constexpr TILE_SIZE = TileSize_v; size_t const M = rows(A); @@ -36,10 +32,10 @@ namespace blast RegisterMatrix ker; - ker.load(1., ptr(A, i, k)); + ker.load(1., ptr(*A, i, k)); - auto a = ptr(L, i, 0); - auto b = ptr(L, k, 0); + auto a = ptr(*L, i, 0); + auto b = ptr(*L, k, 0); for (size_t l = 0; l < k; ++l) ker.ger(ET(-1.), column(a(0, l)), row(trans(b)(l, 0))); @@ -50,31 +46,31 @@ namespace blast ker.potrf(); if (k + KN <= N) - ker.storeLower(ptr(L, i, k)); + ker.storeLower(ptr(*L, i, k)); else - ker.storeLower(ptr(L, i, k), std::min(M - i, KM), N - k); + ker.storeLower(ptr(*L, i, k), std::min(M - i, KM), N - k); } else { // Off-diagonal blocks - ker.trsm(Side::Right, UpLo::Upper, ptr(L, k, k).trans()); + ker.trsm(Side::Right, UpLo::Upper, ptr(*L, k, k).trans()); if (k + KN <= N) - ker.store(ptr(L, i, k)); + ker.store(ptr(*L, i, k)); else - ker.store(ptr(L, i, k), std::min(M - i, KM), N - k); + ker.store(ptr(*L, i, k), std::min(M - i, KM), N - k); } } template inline void potrf( - DenseMatrix const& A, DenseMatrix& L) + blaze::DenseMatrix const& A, blaze::DenseMatrix& L) { - using ET = ElementType_t; + using ET = blaze::ElementType_t; size_t constexpr TILE_SIZE = TileSize_v; - BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(ElementType_t, ET); + BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(blaze::ElementType_t, ET); size_t const M = rows(A); size_t const N = columns(A); @@ -108,4 +104,4 @@ namespace blast potrf_backend<1 * TILE_SIZE, KN>(k, i, *A, *L); } } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/StaticMatrixPointer.hpp b/include/blast/math/dense/StaticMatrixPointer.hpp index 961b5e01..bb70c091 100644 --- a/include/blast/math/dense/StaticMatrixPointer.hpp +++ b/include/blast/math/dense/StaticMatrixPointer.hpp @@ -12,10 +12,6 @@ #include #include -#include -#include -#include - namespace blast { @@ -203,26 +199,18 @@ namespace blast } - template - requires IsStatic_v - BLAZE_ALWAYS_INLINE auto ptr(blaze::DenseMatrix& m, size_t i, size_t j) - { - return StaticMatrixPointer, MT::spacing(), SO, AF, IsPadded_v>((*m).data(), i, j); - } - - - template - requires IsStatic_v - BLAZE_ALWAYS_INLINE auto ptr(blaze::DenseMatrix const& m, size_t i, size_t j) + template + requires IsStatic_v && IsDenseMatrix_v + BLAZE_ALWAYS_INLINE auto ptr(MT& m, size_t i, size_t j) { - return StaticMatrixPointer const, MT::spacing(), SO, AF, IsPadded_v>((*m).data(), i, j); + return StaticMatrixPointer, Spacing_v, StorageOrder_v, AF, IsPadded_v>(data(m), i, j); } - template - requires IsStatic_v - BLAZE_ALWAYS_INLINE auto ptr(blaze::DMatTransExpr const& m, size_t i, size_t j) + template + requires IsStatic_v && IsDenseMatrix_v + BLAZE_ALWAYS_INLINE auto ptr(MT const& m, size_t i, size_t j) { - return trans(ptr(m.operand(), j, i)); + return StaticMatrixPointer const, Spacing_v, StorageOrder_v, AF, IsPadded_v>(data(m), i, j); } } diff --git a/include/blast/math/dense/StaticVectorPointer.hpp b/include/blast/math/dense/StaticVectorPointer.hpp index 6017dafa..6f7a0e2e 100644 --- a/include/blast/math/dense/StaticVectorPointer.hpp +++ b/include/blast/math/dense/StaticVectorPointer.hpp @@ -6,6 +6,7 @@ #include +#include #include @@ -213,4 +214,46 @@ namespace blast { return p.trans(); } + + + /** + * @brief Pointer to a statically spaced vector + * + * @tparam AF true if the pointer is SIMD-aligned + * @tparam VT vector type + * + * @param v vector + * @param i index within @a v + * + * @return vector pointer to @a i -th element of @a v + */ + template + requires IsStaticallySpaced_v + BLAST_ALWAYS_INLINE auto ptr(VT& v, size_t i) + { + // NOTE: we don't use data(v) here because of this bug: + // https://bitbucket.org/blaze-lib/blaze/issues/457 + return StaticVectorPointer, Spacing_v, VT::transposeFlag, AF, IsPadded_v> {&v[i]}; + } + + + /** + * @brief Pointer to a statically spaced const vector + * + * @tparam AF true if the pointer is SIMD-aligned + * @tparam VT vector type + * + * @param v vector + * @param i index within @a v + * + * @return vector pointer to @a i -th element of @a v + */ + template + requires IsStaticallySpaced_v + BLAST_ALWAYS_INLINE auto ptr(VT const& v, size_t i) + { + // NOTE: we don't use data(v) here because of this bug: + // https://bitbucket.org/blaze-lib/blaze/issues/457 + return StaticVectorPointer const, Spacing_v, VT::transposeFlag, AF, IsPadded_v> {&v[i]}; + } } diff --git a/include/blast/math/dense/Swap.hpp b/include/blast/math/dense/Swap.hpp index d192e9fe..cf574c59 100644 --- a/include/blast/math/dense/Swap.hpp +++ b/include/blast/math/dense/Swap.hpp @@ -15,8 +15,7 @@ #pragma once #include - -#include +#include #include @@ -69,4 +68,4 @@ namespace blast if (N > 0) swap(N, ptr(*x), ptr(*y)); } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/Syrk.hpp b/include/blast/math/dense/Syrk.hpp index 182789eb..a39d47f3 100644 --- a/include/blast/math/dense/Syrk.hpp +++ b/include/blast/math/dense/Syrk.hpp @@ -7,10 +7,7 @@ #include #include #include - -#include -#include -#include +#include namespace blast @@ -48,46 +45,46 @@ namespace blast for (; i + 3 * TILE_SIZE <= M && i + 4 * TILE_SIZE != M; i += 3 * TILE_SIZE) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j)); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0))); + ker.load(beta, ptr(*C, i, j)); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0))); if (i == j) - ker.storeLower(ptr(D, i, j)); + ker.storeLower(ptr(*D, i, j)); else - ker.store(ptr(D, i, j)); + ker.store(ptr(*D, i, j)); } for (; i + 2 * TILE_SIZE <= M; i += 2 * TILE_SIZE) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j)); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0))); + ker.load(beta, ptr(*C, i, j)); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0))); if (i == j) - ker.storeLower(ptr(D, i, j)); + ker.storeLower(ptr(*D, i, j)); else - ker.store(ptr(D, i, j)); + ker.store(ptr(*D, i, j)); } for (; i + 1 * TILE_SIZE <= M; i += 1 * TILE_SIZE) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j)); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0))); + ker.load(beta, ptr(*C, i, j)); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0))); if (i == j) - ker.storeLower(ptr(D, i, j)); + ker.storeLower(ptr(*D, i, j)); else - ker.store(ptr(D, i, j)); + ker.store(ptr(*D, i, j)); } // Bottom side if (i < M) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j), M - i, ker.columns()); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0)), M - i, ker.columns()); + ker.load(beta, ptr(*C, i, j), M - i, ker.columns()); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0)), M - i, ker.columns()); if (i == j) - ker.storeLower(ptr(D, i, j), M - i, ker.columns()); + ker.storeLower(ptr(*D, i, j), M - i, ker.columns()); else - ker.store(ptr(D, i, j), M - i, ker.columns()); + ker.store(ptr(*D, i, j), M - i, ker.columns()); } } @@ -102,48 +99,48 @@ namespace blast for (; i + 3 * TILE_SIZE <= M && i + 4 * TILE_SIZE != M; i += 3 * TILE_SIZE) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j), ker.rows(), M - j); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0)), ker.rows(), M - j); + ker.load(beta, ptr(*C, i, j), ker.rows(), M - j); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0)), ker.rows(), M - j); if (i == j) - ker.storeLower(ptr(D, i, j), ker.rows(), M - j); + ker.storeLower(ptr(*D, i, j), ker.rows(), M - j); else - ker.store(ptr(D, i, j), ker.rows(), M - j); + ker.store(ptr(*D, i, j), ker.rows(), M - j); } for (; i + 2 * TILE_SIZE <= M; i += 2 * TILE_SIZE) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j), ker.rows(), M - j); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0)), ker.rows(), M - j); + ker.load(beta, ptr(*C, i, j), ker.rows(), M - j); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0)), ker.rows(), M - j); if (i == j) - ker.storeLower(ptr(D, i, j), ker.rows(), M - j); + ker.storeLower(ptr(*D, i, j), ker.rows(), M - j); else - ker.store(ptr(D, i, j), ker.rows(), M - j); + ker.store(ptr(*D, i, j), ker.rows(), M - j); } for (; i + 1 * TILE_SIZE <= M; i += 1 * TILE_SIZE) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j), ker.rows(), M - j); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0)), ker.rows(), M - j); + ker.load(beta, ptr(*C, i, j), ker.rows(), M - j); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0)), ker.rows(), M - j); if (i == j) - ker.storeLower(ptr(D, i, j), ker.rows(), M - j); + ker.storeLower(ptr(*D, i, j), ker.rows(), M - j); else - ker.store(ptr(D, i, j), ker.rows(), M - j); + ker.store(ptr(*D, i, j), ker.rows(), M - j); } // Bottom-right corner if (i < M) { RegisterMatrix ker; - ker.load(beta, ptr(C, i, j), ker.rows(), M - j); - gemm(ker, K, alpha, ptr(A, i, 0), trans(ptr(A, j, 0)), M - i, M - j); + ker.load(beta, ptr(*C, i, j), ker.rows(), M - j); + gemm(ker, K, alpha, ptr(*A, i, 0), trans(ptr(*A, j, 0)), M - i, M - j); if (i == j) - ker.storeLower(ptr(D, i, j), M - i, M - j); + ker.storeLower(ptr(*D, i, j), M - i, M - j); else - ker.store(ptr(D, i, j), M - i, M - j); + ker.store(ptr(*D, i, j), M - i, M - j); } } } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/Trmm.hpp b/include/blast/math/dense/Trmm.hpp index 041159e0..34dc7c4f 100644 --- a/include/blast/math/dense/Trmm.hpp +++ b/include/blast/math/dense/Trmm.hpp @@ -44,15 +44,15 @@ namespace blast // it is more efficient to apply 2 * TILE_SIZE kernel 2 times than 3 * TILE_SIZE + 1 * TILE_SIZE kernel. for (; i + 2 * TILE_SIZE < M && i + 4 * TILE_SIZE != M; i += 3 * TILE_SIZE) trmmLeftUpper_backend<3 * TILE_SIZE, TILE_SIZE>( - M - i, N, alpha, ptr(A, i, i), ptr(B, i, 0), ptr(C, i, 0)); + M - i, N, alpha, ptr(*A, i, i), ptr(*B, i, 0), ptr(*C, i, 0)); for (; i + 1 * TILE_SIZE < M; i += 2 * TILE_SIZE) trmmLeftUpper_backend<2 * TILE_SIZE, TILE_SIZE>( - M - i, N, alpha, ptr(A, i, i), ptr(B, i, 0), ptr(C, i, 0)); + M - i, N, alpha, ptr(*A, i, i), ptr(*B, i, 0), ptr(*C, i, 0)); for (; i + 0 * TILE_SIZE < M; i += 1 * TILE_SIZE) trmmLeftUpper_backend<1 * TILE_SIZE, TILE_SIZE>( - M - i, N, alpha, ptr(A, i, i), ptr(B, i, 0), ptr(C, i, 0)); + M - i, N, alpha, ptr(*A, i, i), ptr(*B, i, 0), ptr(*C, i, 0)); } @@ -92,46 +92,46 @@ namespace blast for (; i + 3 * TILE_SIZE <= M && i + 4 * TILE_SIZE != M; i += 3 * TILE_SIZE) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j)); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j)); /* ker.trmmRightLower(alpha, ptr(B, i, j), ptr(A, j, j)); ker.gemm(K, alpha, ptr(B, i, j + TILE_SIZE), ptr(A, j + TILE_SIZE, j)); */ - ker.store(ptr(C, i, j)); + ker.store(ptr(*C, i, j)); } for (; i + 2 * TILE_SIZE <= M; i += 2 * TILE_SIZE) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j)); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j)); /* ker.trmmRightLower(alpha, ptr(B, i, j), ptr(A, j, j)); ker.gemm(K, alpha, ptr(B, i, j + TILE_SIZE), ptr(A, j + TILE_SIZE, j)); */ - ker.store(ptr(C, i, j)); + ker.store(ptr(*C, i, j)); } for (; i + 1 * TILE_SIZE <= M; i += 1 * TILE_SIZE) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j)); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j)); /* ker.trmmRightLower(alpha, ptr(B, i, j), ptr(A, j, j)); ker.gemm(K, alpha, ptr(B, i, j + TILE_SIZE), ptr(A, j + TILE_SIZE, j)); */ - ker.store(ptr(C, i, j)); + ker.store(ptr(*C, i, j)); } // Bottom side if (i < M) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j), M - i, ker.columns()); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j), M - i, ker.columns()); /* ker.trmmRightLower(alpha, ptr(B, i, j), ptr(A, j, j)); ker.gemm(K, alpha, ptr(B, i, j + TILE_SIZE), ptr(A, j + TILE_SIZE, j), M - i, ker.columns()); */ - ker.store(ptr(C, i, j), M - i, ker.columns()); + ker.store(ptr(*C, i, j), M - i, ker.columns()); } } @@ -146,31 +146,31 @@ namespace blast for (; i + 3 * TILE_SIZE <= M && i + 4 * TILE_SIZE != M; i += 3 * TILE_SIZE) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j), ker.rows(), N - j); - ker.store(ptr(C, i, j), ker.rows(), N - j); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j), ker.rows(), N - j); + ker.store(ptr(*C, i, j), ker.rows(), N - j); } for (; i + 2 * TILE_SIZE <= M; i += 2 * TILE_SIZE) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j), ker.rows(), N - j); - ker.store(ptr(C, i, j), ker.rows(), N - j); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j), ker.rows(), N - j); + ker.store(ptr(*C, i, j), ker.rows(), N - j); } for (; i + 1 * TILE_SIZE <= M; i += 1 * TILE_SIZE) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j), ker.rows(), N - j); - ker.store(ptr(C, i, j), ker.rows(), N - j); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j), ker.rows(), N - j); + ker.store(ptr(*C, i, j), ker.rows(), N - j); } // Bottom-right corner if (i < M) { RegisterMatrix ker; - gemm(ker, N - j, alpha, ptr(B, i, j), ptr(A, j, j), M - i, N - j); - ker.store(ptr(C, i, j), M - i, N - j); + gemm(ker, N - j, alpha, ptr(*B, i, j), ptr(*A, j, j), M - i, N - j); + ker.store(ptr(*C, i, j), M - i, N - j); } } } -} \ No newline at end of file +} diff --git a/include/blast/math/dense/VectorPointer.hpp b/include/blast/math/dense/VectorPointer.hpp deleted file mode 100644 index 39a4538e..00000000 --- a/include/blast/math/dense/VectorPointer.hpp +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -namespace blast -{ - /* - ======================================================================================= - - Pointer to static dense vector. - - ======================================================================================= - */ - - template - BLAZE_ALWAYS_INLINE auto ptr(StaticVector& v, size_t i) - { - return StaticVectorPointer {v.data() + i}; - } - - - template - BLAZE_ALWAYS_INLINE auto ptr(StaticVector const& v, size_t i) - { - return StaticVectorPointer {v.data() + i}; - } - - - /* - ======================================================================================= - - Pointer to dynamic dense vector. - - ======================================================================================= - */ - - template - BLAZE_ALWAYS_INLINE auto ptr(DynamicVector& v, size_t i) - { - using VT = DynamicVector; - return StaticVectorPointer> {v.data() + i}; - } - - - template - BLAZE_ALWAYS_INLINE auto ptr(DynamicVector const& v, size_t i) - { - using VT = DynamicVector; - return StaticVectorPointer> {v.data() + i}; - } - - - /* - ======================================================================================= - - Pointer to vector transpose expression. - - ======================================================================================= - */ - - template - requires IsDenseVector_v && IsTransExpr_v - BLAZE_ALWAYS_INLINE auto ptr(VT& v, size_t i) noexcept - { - return trans(ptr(v.operand(), i)); - } - - - template - requires IsDenseVector_v && IsTransExpr_v - BLAZE_ALWAYS_INLINE auto ptr(VT const& v, size_t i) noexcept - { - return trans(ptr(v.operand(), i)); - } - - - /* - ======================================================================================= - - Pointer to row of a matrix. - - ======================================================================================= - */ - - template - requires IsDenseVector_v && IsRow_v - BLAZE_ALWAYS_INLINE auto ptr(VT& v, size_t i) noexcept - { - using MT = ViewedType_t; - using ET = ElementType_t; - - if constexpr (IsRowMajorMatrix_v) - { - return StaticVectorPointer> {&(*v)[i]}; - } - else - { - if constexpr (IsStatic_v) - return StaticVectorPointer> {&(*v)[i]}; - else - return DynamicVectorPointer> {&(*v)[i], v.operand().spacing()}; - } - } - - - template - requires IsDenseVector_v && IsRow_v - BLAZE_ALWAYS_INLINE auto ptr(VT const& v, size_t i) noexcept - { - using MT = ViewedType_t; - using ET = ElementType_t; - - if constexpr (IsRowMajorMatrix_v) - { - return StaticVectorPointer> {&(*v)[i]}; - } - else - { - if constexpr (IsStatic_v) - return StaticVectorPointer> {&(*v)[i]}; - else - return DynamicVectorPointer> {&(*v)[i], v.operand().spacing()}; - } - } - - - /* - ======================================================================================= - - Pointer to column of a matrix. - - ======================================================================================= - */ - - template - requires IsDenseVector_v && IsColumn_v - BLAZE_ALWAYS_INLINE auto ptr(VT& v, size_t i) noexcept - { - using MT = ViewedType_t; - using ET = ElementType_t; - - if constexpr (IsColumnMajorMatrix_v) - { - return StaticVectorPointer> {&(*v)[i]}; - } - else - { - if constexpr (IsStatic_v) - return StaticVectorPointer> {&(*v)[i]}; - else - return DynamicVectorPointer> {&(*v)[i], v.operand().spacing()}; - } - } - - - template - requires IsDenseVector_v && IsColumn_v - BLAZE_ALWAYS_INLINE auto ptr(VT const& v, size_t i) noexcept - { - using MT = ViewedType_t; - using ET = ElementType_t; - - if constexpr (IsColumnMajorMatrix_v) - { - return StaticVectorPointer> {&(*v)[i]}; - } - else - { - if constexpr (IsStatic_v) - return StaticVectorPointer> {&(*v)[i]}; - else - return DynamicVectorPointer> {&(*v)[i], v.operand().spacing()}; - } - } - - - /* - ======================================================================================= - - Pointer to a subvector. - - ======================================================================================= - */ - - template - requires IsDenseVector_v && IsSubvector_v - BLAZE_ALWAYS_INLINE auto ptr(VT& v, size_t i) noexcept - { - return ptr(v.operand(), v.offset() + i); - } - - - template - requires IsDenseVector_v && IsSubvector_v - BLAZE_ALWAYS_INLINE auto ptr(VT const& v, size_t i) noexcept - { - return ptr(v.operand(), v.offset() + i); - } - - - /* - ======================================================================================= - - Pointer to general dense vector. - - ======================================================================================= - */ - template - requires IsDenseVector_v - BLAZE_ALWAYS_INLINE auto ptr(VT& v) - { - return ptr>(v, 0); - } - - - template - requires IsDenseVector_v - BLAZE_ALWAYS_INLINE auto ptr(VT const& v) - { - return ptr>(v, 0); - } -} \ No newline at end of file diff --git a/include/blast/math/expressions/PMatTransExpr.hpp b/include/blast/math/expressions/PMatTransExpr.hpp index 3098741a..ad5b262c 100644 --- a/include/blast/math/expressions/PMatTransExpr.hpp +++ b/include/blast/math/expressions/PMatTransExpr.hpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -32,6 +33,8 @@ #include #include +#include + namespace blast { @@ -234,4 +237,22 @@ namespace blast using ReturnType = const PMatTransExpr; return ReturnType(*pm); } + + + template + struct IsPanelMatrix> : std::true_type {}; + + + template + struct IsStatic> : IsStatic {}; + + + /** + * @brief Specialization for @a PMatTransExpr + * + * @tparam expression operand type + * @tparam SO storage order + */ + template + struct Spacing> : Spacing {}; } diff --git a/include/blast/math/expressions/PanelMatrix.hpp b/include/blast/math/expressions/PanelMatrix.hpp index d0494887..50d12268 100644 --- a/include/blast/math/expressions/PanelMatrix.hpp +++ b/include/blast/math/expressions/PanelMatrix.hpp @@ -5,28 +5,21 @@ #pragma once #include +#include #include #include #include #include -#include -#include +#include -#include -#include -#include -#include -#include +#include namespace blast { - using namespace blaze; - - template struct PanelMatrix - : public Matrix + : public blaze::Matrix { public: using TagType = Group0; @@ -80,14 +73,14 @@ namespace blast template inline auto assign(PanelMatrix& lhs, blaze::DMatTDMatMultExpr const& rhs) - -> blaze::EnableIf_t && IsRowMajorMatrix_v && IsPanelMatrix_v && IsRowMajorMatrix_v> + -> blaze::EnableIf_t && blaze::IsRowMajorMatrix_v && IsPanelMatrix_v && blaze::IsRowMajorMatrix_v> { BLAZE_THROW_LOGIC_ERROR("Not implemented 2"); } template - inline void assign(DenseMatrix& lhs, PanelMatrix const& rhs) + inline void assign(blaze::DenseMatrix& lhs, PanelMatrix const& rhs) { BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" ); BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" ); @@ -176,7 +169,7 @@ namespace blast template - inline void assign(PanelMatrix& lhs, DenseMatrix const& rhs) + inline void assign(PanelMatrix& lhs, blaze::DenseMatrix const& rhs) { size_t const m = (*rhs).rows(); size_t const n = (*rhs).columns(); diff --git a/include/blast/math/panel/DynamicPanelMatrix.hpp b/include/blast/math/panel/DynamicPanelMatrix.hpp index f82e51e6..4dd69d1f 100644 --- a/include/blast/math/panel/DynamicPanelMatrix.hpp +++ b/include/blast/math/panel/DynamicPanelMatrix.hpp @@ -109,7 +109,7 @@ namespace blast template< typename MT // Type of the right-hand side matrix , bool SO2 > // Storage order of the right-hand side matrix - DynamicPanelMatrix& operator=(Matrix const& rhs) + DynamicPanelMatrix& operator=(blaze::Matrix const& rhs) { assign(*this, *rhs); return *this; @@ -238,4 +238,4 @@ namespace blaze { using Type = blast::DynamicPanelMatrix; }; -} \ No newline at end of file +} diff --git a/include/blast/math/panel/DynamicPanelMatrixPointer.hpp b/include/blast/math/panel/DynamicPanelMatrixPointer.hpp index 13d23cec..f58dcabf 100644 --- a/include/blast/math/panel/DynamicPanelMatrixPointer.hpp +++ b/include/blast/math/panel/DynamicPanelMatrixPointer.hpp @@ -8,12 +8,8 @@ #include #include #include -#include -#include #include -#include - namespace blast { @@ -252,26 +248,18 @@ namespace blast } - template - requires (!IsStatic_v) - BLAZE_ALWAYS_INLINE auto ptr(PanelMatrix& m, size_t i, size_t j) - { - return DynamicPanelMatrixPointer, SO, AF, IsPadded_v>((*m).data(), spacing(m), i, j); - } - - - template - requires (!IsStatic_v) - BLAZE_ALWAYS_INLINE auto ptr(PanelMatrix const& m, size_t i, size_t j) + template + requires (!IsStatic_v) && IsPanelMatrix_v + BLAZE_ALWAYS_INLINE auto ptr(MT& m, size_t i, size_t j) { - return DynamicPanelMatrixPointer const, SO, AF, IsPadded_v>((*m).data(), spacing(m), i, j); + return DynamicPanelMatrixPointer, StorageOrder_v, AF, IsPadded_v>(data(m), spacing(m), i, j); } - template + template requires (!IsStatic_v) && IsPanelMatrix_v - BLAZE_ALWAYS_INLINE auto ptr(PMatTransExpr const& m, size_t i, size_t j) + BLAZE_ALWAYS_INLINE auto ptr(MT const& m, size_t i, size_t j) { - return trans(ptr(m.operand(), j, i)); + return DynamicPanelMatrixPointer const, StorageOrder_v, AF, IsPadded_v>(data(m), spacing(m), i, j); } } diff --git a/include/blast/math/panel/Gemm.hpp b/include/blast/math/panel/Gemm.hpp deleted file mode 100644 index aaeb9231..00000000 --- a/include/blast/math/panel/Gemm.hpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -#include -#include -#include -#include - - -namespace blast -{ - /** - * @brief Matrix-matrix multiplication for @a PanelMatrix arguments - * - * D := alpha*A*B + beta*C - * - * alpha and beta are scalars, and A, B and C are matrices, with A - * an m by k matrix, B a k by n matrix and C an m by n matrix. - * - * @param alpha the scalar alpha - * @param A the matrix A - * @param B the matrix B - * @param beta the scalar beta - * @param C the matrix C - * @param D the output matrix D - */ - template < - typename ST1, typename MT1, typename MT2, bool SO2, - typename ST2, typename MT3, typename MT4 - > - inline void gemm( - ST1 alpha, - PanelMatrix const& A, PanelMatrix const& B, - ST2 beta, PanelMatrix const& C, PanelMatrix& D) - { - size_t const M = rows(A); - size_t const N = columns(B); - size_t const K = columns(A); - - if (rows(B) != K) - BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); - - if (rows(C) != M || columns(C) != N) - BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); - - if (rows(D) != M || columns(D) != N) - BLAST_THROW_EXCEPTION(std::invalid_argument {"Matrix sizes do not match"}); - - gemm(M, N, K, alpha, ptr(*A), ptr(*B), beta, ptr(*C), ptr(*D)); - } - - - /** - * @brief Matrix-matrix multiplication for @a PanelMatrix arguments - * - * D := A*B + C - * - * A, B and C are matrices, with A - * an m by k matrix, B a k by n matrix and C an m by n matrix. - * - * @param A the matrix A - * @param B the matrix B - * @param C the matrix C - * @param D the output matrix D - */ - template < - typename MT1, typename MT2, bool SO2, - typename MT3, typename MT4 - > - inline void gemm( - PanelMatrix const& A, PanelMatrix const& B, - PanelMatrix const& C, PanelMatrix& D) - { - using ET = ElementType_t; - gemm(ET(1.), A, B, ET(1.), C, D); - } -} diff --git a/include/blast/math/panel/MatrixPointer.hpp b/include/blast/math/panel/MatrixPointer.hpp deleted file mode 100644 index 6caca766..00000000 --- a/include/blast/math/panel/MatrixPointer.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - - -namespace blast -{ - template - BLAZE_ALWAYS_INLINE auto ptr(PanelMatrix& m) - { - return ptr>(*m, 0, 0); - } - - - template - BLAZE_ALWAYS_INLINE auto ptr(PanelMatrix const& m) - { - return ptr>(*m, 0, 0); - } - - - template - requires IsPanelMatrix_v - BLAZE_ALWAYS_INLINE auto ptr(blaze::DMatTransExpr const& m) - { - return ptr>(*m, 0, 0); - } -} diff --git a/include/blast/math/panel/Potrf.hpp b/include/blast/math/panel/Potrf.hpp index 879c46f0..e1763809 100644 --- a/include/blast/math/panel/Potrf.hpp +++ b/include/blast/math/panel/Potrf.hpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include @@ -97,4 +97,4 @@ namespace blast potrf_backend<1 * PANEL_SIZE, KN>(k, i, *A, *L); } } -} \ No newline at end of file +} diff --git a/include/blast/math/panel/StaticPanelMatrix.hpp b/include/blast/math/panel/StaticPanelMatrix.hpp index 585b6f6b..0fa0a7b3 100644 --- a/include/blast/math/panel/StaticPanelMatrix.hpp +++ b/include/blast/math/panel/StaticPanelMatrix.hpp @@ -5,9 +5,10 @@ #pragma once #include -#include #include +#include #include +#include #include #include @@ -15,8 +16,10 @@ #include #include #include +#include #include +#include namespace blast @@ -223,6 +226,18 @@ namespace blast return *this; } + + + /** + * @brief Specialization for @a StaticPanelMatrix + * + * @tparam Type element type + * @tparam M number of rows + * @tparam N number of columns + * @tparam SO storage order + */ + template + struct Spacing> : std::integral_constant::spacing()> {}; } namespace blaze diff --git a/include/blast/math/panel/StaticPanelMatrixPointer.hpp b/include/blast/math/panel/StaticPanelMatrixPointer.hpp index 85f3d1ba..da10f069 100644 --- a/include/blast/math/panel/StaticPanelMatrixPointer.hpp +++ b/include/blast/math/panel/StaticPanelMatrixPointer.hpp @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include @@ -244,26 +242,18 @@ namespace blast } - template - requires IsStatic_v - BLAZE_ALWAYS_INLINE auto ptr(PanelMatrix& m, size_t i, size_t j) - { - return StaticPanelMatrixPointer, MT::spacing(), SO, AF, IsPadded_v>((*m).data(), i, j); - } - - - template - requires IsStatic_v - BLAZE_ALWAYS_INLINE auto ptr(PanelMatrix const& m, size_t i, size_t j) + template + requires IsStatic_v && IsPanelMatrix_v + BLAZE_ALWAYS_INLINE auto ptr(MT& m, size_t i, size_t j) { - return StaticPanelMatrixPointer const, MT::spacing(), SO, AF, IsPadded_v>((*m).data(), i, j); + return StaticPanelMatrixPointer, Spacing_v, StorageOrder_v, AF, IsPadded_v>(data(m), i, j); } - template + template requires IsStatic_v && IsPanelMatrix_v - BLAZE_ALWAYS_INLINE auto ptr(PMatTransExpr const& m, size_t i, size_t j) + BLAZE_ALWAYS_INLINE auto ptr(MT const& m, size_t i, size_t j) { - return trans(ptr(m.operand(), j, i)); + return StaticPanelMatrixPointer const, Spacing_v, StorageOrder_v, AF, IsPadded_v>(data(m), i, j); } } diff --git a/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp b/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp index 9cd7f826..caa0eeb3 100644 --- a/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp +++ b/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp @@ -6,14 +6,16 @@ #include #include +#include #include #include +#include #include +#include #include #include #include -#include #include #include @@ -31,12 +33,12 @@ namespace blast /// @tparam SO orientation of SIMD registers. template class DynamicRegisterMatrix - : public Matrix, SO> + : public blaze::Matrix, SO> { public: static_assert(SO == columnMajor, "Only column-major register matrices are currently supported"); - using BaseType = Matrix, SO>; + using BaseType = blaze::Matrix, SO>; using BaseType::storageOrder; /// @brief Type of matrix elements @@ -135,7 +137,7 @@ namespace blast /// @brief R(0:m-1, 0:n-1) += beta * A template - requires MatrixPointer && (PA::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) void axpy(T beta, PA a) noexcept { #pragma unroll @@ -148,25 +150,25 @@ namespace blast /// @brief Load from memory template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void load(P p) noexcept; /// @brief Load from memory template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void load(T beta, P p) noexcept; /// @brief Store matrix at location pointed by \a p template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void store(P p) const noexcept; /// @brief Store lower-triangular part of the matrix at location pointed by \a p. template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void storeLower(P p) const noexcept; @@ -178,7 +180,7 @@ namespace blast /// @param a pointer to the first element of the first matrix. /// @param b pointer to the first element of the second matrix. template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer void ger(T alpha, PA a, PB b) noexcept; @@ -189,7 +191,7 @@ namespace blast /// @param a pointer to the first element of the first matrix. /// @param b pointer to the first element of the second matrix. template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer void ger(PA a, PB b) noexcept; @@ -202,7 +204,7 @@ namespace blast /// @brief a pointer to a triangular matrix /// template - requires MatrixPointer + requires MatrixPointer void trsmRightUpper(P a); @@ -222,7 +224,7 @@ namespace blast /// @param b general matrix. /// template - requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer void trmmLeftUpper(T alpha, P1 a, P2 b) noexcept; @@ -242,7 +244,7 @@ namespace blast /// @param b general matrix. /// template - requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (P1::storageOrder == columnMajor) && MatrixPointer void trmmRightLower(T alpha, P1 a, P2 b) noexcept; @@ -289,7 +291,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::load(P p) noexcept { #pragma unroll @@ -302,7 +304,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::load(T beta, P p) noexcept { #pragma unroll @@ -315,7 +317,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::store(P p) const noexcept { // The compile-time constant size of the j loop in combination with the if() expression @@ -337,7 +339,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void DynamicRegisterMatrix::storeLower(P p) const noexcept { for (size_t j = 0; j < N; ++j) if (j < n_) @@ -393,7 +395,7 @@ namespace blast template template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer BLAZE_ALWAYS_INLINE void DynamicRegisterMatrix::ger(T alpha, PA a, PB b) noexcept { SimdVecType ax[RM]; @@ -416,7 +418,7 @@ namespace blast template template - requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer + requires MatrixPointer && (PA::storageOrder == columnMajor) && MatrixPointer BLAZE_ALWAYS_INLINE void DynamicRegisterMatrix::ger(PA a, PB b) noexcept { SimdVecType ax[RM]; @@ -550,23 +552,23 @@ namespace blast // } - template - inline bool operator==(DynamicRegisterMatrix const& rm, Matrix const& m) + template + inline bool operator==(DynamicRegisterMatrix const& rm, MT const& m) { if (rows(m) != rm.rows() || columns(m) != rm.columns()) return false; for (size_t i = 0; i < rm.rows(); ++i) for (size_t j = 0; j < rm.columns(); ++j) - if (rm(i, j) != (*m)(i, j)) + if (rm(i, j) != m(i, j)) return false; return true; } - template - inline bool operator==(Matrix const& m, DynamicRegisterMatrix const& rm) + template + inline bool operator==(MT const& m, DynamicRegisterMatrix const& rm) { return rm == m; } @@ -576,9 +578,9 @@ namespace blast typename T, size_t M, size_t N, bool SO, typename PA, typename PB, typename PC, typename PD > - requires MatrixPointer && (PA::storageOrder == columnMajor) - && MatrixPointer - && MatrixPointer && (PC::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) + && MatrixPointer + && MatrixPointer && (PC::storageOrder == columnMajor) BLAZE_ALWAYS_INLINE void gemm(DynamicRegisterMatrix& ker, size_t K, T alpha, PA a, PB b, T beta, PC c, PD d) noexcept { diff --git a/include/blast/math/register_matrix/RegisterMatrix.hpp b/include/blast/math/register_matrix/RegisterMatrix.hpp index 631b530d..156031c0 100644 --- a/include/blast/math/register_matrix/RegisterMatrix.hpp +++ b/include/blast/math/register_matrix/RegisterMatrix.hpp @@ -7,9 +7,12 @@ #include #include #include +#include #include #include #include +#include +#include #include #include @@ -22,9 +25,6 @@ namespace blast { - using namespace blaze; - - /// @brief Register-resident matrix /// /// The RegisterMatrix class provides basic linear algebra operations that can be performed @@ -39,12 +39,12 @@ namespace blast /// template class RegisterMatrix - : public Matrix, SO> + : public blaze::Matrix, SO> { public: static_assert(SO == columnMajor, "Only column-major register matrices are currently supported"); - using BaseType = Matrix, SO>; + using BaseType = blaze::Matrix, SO>; using BaseType::storageOrder; /// @brief Type of matrix elements @@ -131,7 +131,7 @@ namespace blast /// @brief R += beta * A template - requires MatrixPointer && (PA::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) void axpy(T beta, PA a) noexcept { SimdVecType const beta_simd {beta}; @@ -146,7 +146,7 @@ namespace blast /// @brief R(0:m-1, 0:n-1) += beta * A template - requires MatrixPointer && (PA::storageOrder == columnMajor) + requires MatrixPointer && (PA::storageOrder == columnMajor) void axpy(T beta, PA a, size_t m, size_t n) noexcept { SimdVecType const beta_simd {beta}; @@ -191,7 +191,7 @@ namespace blast /// @brief Store lower-triangular part of the matrix at location pointed by \a p. template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) void storeLower(P p) const noexcept; @@ -466,7 +466,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::store(P p) const noexcept { #pragma unroll @@ -479,7 +479,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::store(P p, size_t m, size_t n) const noexcept { // The compile-time constant size of the j loop in combination with the if() expression @@ -501,7 +501,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::storeLower(P p) const noexcept { for (size_t j = 0; j < N; ++j) @@ -524,7 +524,7 @@ namespace blast template template - requires MatrixPointer && (P::storageOrder == columnMajor) + requires MatrixPointer && (P::storageOrder == columnMajor) inline void RegisterMatrix::storeLower(P p, size_t m, size_t n) const noexcept { for (size_t j = 0; j < N; ++j) if (j < n) @@ -802,23 +802,23 @@ namespace blast } - template - inline bool operator==(RegisterMatrix const& rm, Matrix const& m) + template + inline bool operator==(RegisterMatrix const& rm, MT const& m) { if (rows(m) != rm.rows() || columns(m) != rm.columns()) return false; for (size_t i = 0; i < rm.rows(); ++i) for (size_t j = 0; j < rm.columns(); ++j) - if (rm(i, j) != (*m)(i, j)) + if (rm(i, j) != m(i, j)) return false; return true; } - template - inline bool operator==(Matrix const& m, RegisterMatrix const& rm) + template + inline bool operator==(MT const& m, RegisterMatrix const& rm) { return rm == m; } diff --git a/include/blast/math/typetraits/IsContiguous.hpp b/include/blast/math/typetraits/IsContiguous.hpp new file mode 100644 index 00000000..5a978b7b --- /dev/null +++ b/include/blast/math/typetraits/IsContiguous.hpp @@ -0,0 +1,26 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + + +namespace blast +{ + /** + * @brief Tests whether the elements of a given data type lie contiguous in memory + * + * @tparam T data type + */ + template + struct IsContiguous; + + + /** + * @brief Shortcut for @a IsContiguous::value + * + * @tparam T data type + */ + template + bool constexpr IsContiguous_v = IsContiguous::value; +} diff --git a/include/blast/math/typetraits/IsDenseMatrix.hpp b/include/blast/math/typetraits/IsDenseMatrix.hpp new file mode 100644 index 00000000..0709f0f8 --- /dev/null +++ b/include/blast/math/typetraits/IsDenseMatrix.hpp @@ -0,0 +1,27 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +namespace blast +{ + /** + * @brief Tests if the given data type is a dense matrix with column- or row-major storage + * + * @tparam T data type + */ + template + struct IsDenseMatrix : std::false_type {}; + + + /** + * @brief Shortcut for @a IsDenseMatrix::value + * + * @tparam T data type + */ + template + bool constexpr IsDenseMatrix_v = IsDenseMatrix::value; +} diff --git a/include/blast/math/typetraits/IsDenseVector.hpp b/include/blast/math/typetraits/IsDenseVector.hpp new file mode 100644 index 00000000..ae0df964 --- /dev/null +++ b/include/blast/math/typetraits/IsDenseVector.hpp @@ -0,0 +1,28 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + + +namespace blast +{ + /** + * @brief Tests if the given data type is a vector with dense storage + * + * @tparam T data type + */ + template + struct IsDenseVector : std::false_type {}; + + + /** + * @brief Shortcut for @a IsDenseVector::value + * + * @tparam T data type + */ + template + bool constexpr IsDenseVector_v = IsDenseVector::value; +} diff --git a/include/blast/math/typetraits/IsStatic.hpp b/include/blast/math/typetraits/IsStatic.hpp index 0f60867e..a6bb784b 100644 --- a/include/blast/math/typetraits/IsStatic.hpp +++ b/include/blast/math/typetraits/IsStatic.hpp @@ -1,23 +1,26 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once -#include - namespace blast { - using blaze::IsStatic_v; -} \ No newline at end of file + /** + * @brief Tests if dimensions of the given matrix or vector type are known at compile-time. + * + * @tparam T matrix or vector type + */ + template + struct IsStatic; + + + /** + * @brief Shortcut for @a IsStatic::value + * + * @tparam T matrix or vector type + */ + template + bool constexpr IsStatic_v = IsStatic::value; +} diff --git a/include/blast/math/typetraits/IsStaticallySpaced.hpp b/include/blast/math/typetraits/IsStaticallySpaced.hpp new file mode 100644 index 00000000..97abccc7 --- /dev/null +++ b/include/blast/math/typetraits/IsStaticallySpaced.hpp @@ -0,0 +1,27 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + + +namespace blast +{ + /** + * @brief Tests if the given vector data type has spacing between elements + * which is known at compile-time. + * + * @tparam VT vector data type + */ + template + struct IsStaticallySpaced; + + + /** + * @brief Shortcut for @a IsStaticallySpaced::value + * + * @tparam VT vector data type + */ + template + bool constexpr IsStaticallySpaced_v = IsStaticallySpaced::value; +} diff --git a/include/blast/math/typetraits/Matrix.hpp b/include/blast/math/typetraits/Matrix.hpp new file mode 100644 index 00000000..e263b625 --- /dev/null +++ b/include/blast/math/typetraits/Matrix.hpp @@ -0,0 +1,29 @@ +// Copyright (c) 2019-2023 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include + + +namespace blast +{ + /** + * @brief Matrix concept + * + * @tparam M matrix type + * @tparam T element type + */ + template > + concept Matrix = requires(M m, T v, T * p, size_t i, size_t j) + { + m; + // m(i, j) = v; + v = m(i, j); + i = rows(m); + j = columns(m); + // p = data(m); + }; +} diff --git a/include/blast/math/typetraits/Spacing.hpp b/include/blast/math/typetraits/Spacing.hpp new file mode 100644 index 00000000..118da285 --- /dev/null +++ b/include/blast/math/typetraits/Spacing.hpp @@ -0,0 +1,32 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + + +namespace blast +{ + /** + * @brief Deduces memory spacing of the given data type if it is known at compile-time + * - for row-major matrices, deduced to the memory distance between rows + * - for column-major matrices, deduced to the memory distance between columns + * - for panel-major matrices, deduced to memory distance between panels + * - for vectors, deduced to memory distance between consecutive elements + * + * @tparam T data type + */ + template + struct Spacing; + + + /** + * @brief Shortcut for @a Spacing::value + * + * @tparam MT matrix type + */ + template + size_t constexpr Spacing_v = Spacing::value; +} diff --git a/include/blast/math/typetraits/Vector.hpp b/include/blast/math/typetraits/Vector.hpp new file mode 100644 index 00000000..1ebbff20 --- /dev/null +++ b/include/blast/math/typetraits/Vector.hpp @@ -0,0 +1,29 @@ +// Copyright (c) 2019-2023 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include +#include + + +namespace blast +{ + /** + * @brief Vector concept + * + * @tparam V vector type + * @tparam T element type + */ + template > + concept Vector = requires(V v, T a, T * p, size_t i) + { + v; + // v[i] = a; + a = v[i]; + i = size(v); + // i = spacing(v); + p = data(v); + }; +} diff --git a/include/blast/math/views/submatrix/BaseTemplate.hpp b/include/blast/math/views/submatrix/BaseTemplate.hpp index ef3cc409..a19b7cfa 100644 --- a/include/blast/math/views/submatrix/BaseTemplate.hpp +++ b/include/blast/math/views/submatrix/BaseTemplate.hpp @@ -4,6 +4,8 @@ #pragma once +#include + namespace blast { @@ -13,4 +15,4 @@ namespace blast class PanelSubmatrix { }; -} \ No newline at end of file +} diff --git a/include/blast/system/Inline.hpp b/include/blast/system/Inline.hpp index 8ec59251..68897585 100644 --- a/include/blast/system/Inline.hpp +++ b/include/blast/system/Inline.hpp @@ -1,19 +1,17 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once -#include +#if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define BLAST_STRONG_INLINE __forceinline +#else +# define BLAST_STRONG_INLINE inline +#endif -#define BLAST_ALWAYS_INLINE BLAZE_ALWAYS_INLINE +#if defined(__GNUC__) +# define BLAST_ALWAYS_INLINE __attribute__((always_inline)) inline +#else +# define BLAST_ALWAYS_INLINE BLAST_STRONG_INLINE +#endif diff --git a/include/blast/util/Types.hpp b/include/blast/util/Types.hpp new file mode 100644 index 00000000..f7c82547 --- /dev/null +++ b/include/blast/util/Types.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace blast +{ + using std::size_t; + using std::ptrdiff_t; +} diff --git a/include/test/Testing.hpp b/include/test/Testing.hpp index 2802ab79..687c923c 100644 --- a/include/test/Testing.hpp +++ b/include/test/Testing.hpp @@ -21,32 +21,6 @@ namespace blast :: testing using namespace ::testing; - namespace detail - { - template - class ForcePrintImpl - : public T - { - friend void PrintTo(const ForcePrintImpl &m, ::std::ostream *o) - { - *o << "\n" << m; - } - }; - } - - - /* - * Makes the Eigen3 matrix classes printable from GTest checking macros like EXPECT_EQ. - * Usage: EXPECT_EQ(forcePrint(a), forcePrint(b)) - * Taken from this post: http://stackoverflow.com/questions/25146997/teach-google-test-how-to-print-eigen-matrix - */ - template - decltype(auto) forcePrint(T const& val) - { - return static_cast const&>(val); - } - - MATCHER_P(FloatNearPointwise, tol, "Out of range") { return (std::get<0>(arg) > std::get<1>(arg) - tol && std::get<0>(arg) < std::get<1>(arg) + tol) ; @@ -202,7 +176,7 @@ namespace blast :: testing ASSERT_TRUE(::blast::testing::approxEqual(val, expected, abs_tol, rel_tol)) #define BLAST_EXPECT_EQ(val, expected) \ - EXPECT_EQ(::blast::testing::forcePrint(val), ::blast::testing::forcePrint(expected)) + EXPECT_EQ(val, expected) #define BLAST_ASSERT_EQ(val, expected) \ - ASSERT_EQ(::blast::testing::forcePrint(val), ::blast::testing::forcePrint(expected)) + ASSERT_EQ(val, expected) diff --git a/package.xml b/package.xml index 952ada91..b67347d1 100644 --- a/package.xml +++ b/package.xml @@ -7,6 +7,8 @@ Mikhail Katliar BSD 3-Clause License + boost + cmake diff --git a/test/blast/math/dense/DynamicVectorPointerTest.cpp b/test/blast/math/dense/DynamicVectorPointerTest.cpp index de9d27db..4ce95ca0 100644 --- a/test/blast/math/dense/DynamicVectorPointerTest.cpp +++ b/test/blast/math/dense/DynamicVectorPointerTest.cpp @@ -2,7 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include +#include #include @@ -210,4 +211,21 @@ namespace blast :: testing { this->template testMatrixColumnSubvectorImpl(); } -} \ No newline at end of file + + + TEST(DynamicVectorPointerTest, testSpacing) + { + blaze::DynamicVector dv(10); + ASSERT_EQ(spacing(dv), 1); + + blaze::StaticVector sv; + ASSERT_EQ(spacing(sv), 1); + } + + + TEST(DynamicVectorPointerTest, testStaticallySpaced) + { + ASSERT_TRUE((IsStaticallySpaced_v>)); + ASSERT_TRUE((IsStaticallySpaced_v>)); + } +} diff --git a/test/blast/math/dense/GemmTest.cpp b/test/blast/math/dense/GemmTest.cpp index e19eaca7..d56daaf5 100644 --- a/test/blast/math/dense/GemmTest.cpp +++ b/test/blast/math/dense/GemmTest.cpp @@ -4,12 +4,12 @@ #define BLAST_USER_ASSERTION 1 -#include +#include #include #include -#include +#include namespace blast :: testing @@ -121,4 +121,4 @@ namespace blast :: testing INSTANTIATE_TYPED_TEST_SUITE_P(double, DenseGemmTest, double); -} \ No newline at end of file +} diff --git a/test/blast/math/dense/GerTest.cpp b/test/blast/math/dense/GerTest.cpp index 70576ef1..cbf84913 100644 --- a/test/blast/math/dense/GerTest.cpp +++ b/test/blast/math/dense/GerTest.cpp @@ -6,8 +6,8 @@ #define BLAST_USER_ASSERTION 1 #include - - +#include +#include #include #include @@ -64,4 +64,4 @@ namespace blast :: testing INSTANTIATE_TYPED_TEST_SUITE_P(double, DenseGerTest, double); -} \ No newline at end of file +} diff --git a/test/blast/math/dense/Getf2Test.cpp b/test/blast/math/dense/Getf2Test.cpp index 80cdab7d..9f725701 100644 --- a/test/blast/math/dense/Getf2Test.cpp +++ b/test/blast/math/dense/Getf2Test.cpp @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - #include #include +#include #include #include @@ -23,13 +23,13 @@ namespace blast :: testing using Real = T; template - static DynamicMatrix luRestore(Matrix const& LU, size_t * ipiv) + static blaze::DynamicMatrix luRestore(blaze::Matrix const& LU, size_t * ipiv) { auto const M = rows(LU); auto const N = columns(LU); auto const K = std::min(M, N); - DynamicMatrix L(M, K, 0.); + blaze::DynamicMatrix L(M, K, 0.); for (size_t i = 0; i < M; ++i) { for (size_t j = 0; j < i && j < K; ++j) @@ -39,7 +39,7 @@ namespace blast :: testing L(i, i) = 1.; } - DynamicMatrix U(K, N, 0.); + blaze::DynamicMatrix U(K, N, 0.); for (size_t i = 0; i < K; ++i) { for (size_t j = i; j < N; ++j) @@ -63,9 +63,9 @@ namespace blast :: testing // Init matrices // - DynamicMatrix A(M, N); + blaze::DynamicMatrix A(M, N); randomize(A); - DynamicMatrix A_orig = A; + blaze::DynamicMatrix A_orig = A; // Do getrf std::vector ipiv(K); @@ -105,4 +105,4 @@ namespace blast :: testing INSTANTIATE_TYPED_TEST_SUITE_P(double, DenseGetf2Test, double); // INSTANTIATE_TYPED_TEST_SUITE_P(Potrf_float, DensePotrtTest, float); -} \ No newline at end of file +} diff --git a/test/blast/math/dense/GetrfTest.cpp b/test/blast/math/dense/GetrfTest.cpp index 4a1a3732..59eaa7cb 100644 --- a/test/blast/math/dense/GetrfTest.cpp +++ b/test/blast/math/dense/GetrfTest.cpp @@ -2,14 +2,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "blast/math/dense/Laswp.hpp" #include #include #include - #include +#include -#include #include #include #include @@ -27,13 +25,13 @@ namespace blast :: testing using Real = T; template - static DynamicMatrix luRestore(Matrix const& LU, size_t * ipiv) + static blaze::DynamicMatrix luRestore(blaze::Matrix const& LU, size_t * ipiv) { auto const M = rows(LU); auto const N = columns(LU); auto const K = std::min(M, N); - DynamicMatrix L(M, K, 0.); + blaze::DynamicMatrix L(M, K, 0.); for (size_t i = 0; i < M; ++i) { for (size_t j = 0; j < i && j < K; ++j) @@ -43,7 +41,7 @@ namespace blast :: testing L(i, i) = 1.; } - DynamicMatrix U(K, N, 0.); + blaze::DynamicMatrix U(K, N, 0.); for (size_t i = 0; i < K; ++i) { for (size_t j = i; j < N; ++j) @@ -67,9 +65,9 @@ namespace blast :: testing // Init matrices // - DynamicMatrix A(M, N); + blaze::DynamicMatrix A(M, N); randomize(A); - DynamicMatrix A_orig = A; + blaze::DynamicMatrix A_orig = A; // Do getrf std::vector ipiv(K); @@ -132,4 +130,4 @@ namespace blast :: testing INSTANTIATE_TYPED_TEST_SUITE_P(double, DenseGetrfTest, double); // INSTANTIATE_TYPED_TEST_SUITE_P(Potrf_float, DensePotrtTest, float); -} \ No newline at end of file +} diff --git a/test/blast/math/dense/Iamax.cpp b/test/blast/math/dense/Iamax.cpp index 7d0cb254..c66134e1 100644 --- a/test/blast/math/dense/Iamax.cpp +++ b/test/blast/math/dense/Iamax.cpp @@ -1,18 +1,9 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #include +#include #include #include @@ -37,7 +28,7 @@ namespace blast :: testing for (size_t n = 1; n < 50; ++n) { - DynamicVector x(n); + blaze::DynamicVector x(n); randomize(x); size_t const ind = iamax(x); @@ -46,4 +37,4 @@ namespace blast :: testing ASSERT_LE(std::abs(v), x[ind]) << "Error at size " << n; } } -} \ No newline at end of file +} diff --git a/test/blast/math/dense/MatrixPointerTest.cpp b/test/blast/math/dense/MatrixPointerTest.cpp index 5bc45c2f..123a4dc0 100644 --- a/test/blast/math/dense/MatrixPointerTest.cpp +++ b/test/blast/math/dense/MatrixPointerTest.cpp @@ -2,19 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include -#include +#include #include #include -#include -#include -#include #include -#include - -#include +#include namespace blast :: testing @@ -53,7 +47,7 @@ namespace blast :: testing template struct MatrixType> { - using type = blaze::DynamicPanelMatrix; + using type = DynamicPanelMatrix; }; @@ -67,7 +61,7 @@ namespace blast :: testing static size_t constexpr N = SO == columnMajor ? S / SimdSize_v : defaultColumns; public: - using type = blaze::StaticPanelMatrix; + using type = StaticPanelMatrix; static_assert(type::spacing() == S); }; @@ -245,4 +239,4 @@ namespace blast :: testing EXPECT_EQ(p_trans.storageOrder, !p.storageOrder); } } -} \ No newline at end of file +} diff --git a/test/blast/math/dense/PotrfTest.cpp b/test/blast/math/dense/PotrfTest.cpp index 0e8def7a..f8fa24d4 100644 --- a/test/blast/math/dense/PotrfTest.cpp +++ b/test/blast/math/dense/PotrfTest.cpp @@ -2,13 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - #include #include #include #include +#include + namespace blast :: testing { @@ -30,7 +31,7 @@ namespace blast :: testing { // Init matrices // - DynamicMatrix A(M, M), L(M, M); + blaze::DynamicMatrix A(M, M), L(M, M); makePositiveDefinite(A); reset(L); @@ -51,7 +52,7 @@ namespace blast :: testing // Init matrices // - StaticMatrix A, L; + blaze::StaticMatrix A, L; makePositiveDefinite(A); reset(L); @@ -71,10 +72,10 @@ namespace blast :: testing // Init matrices // - StaticMatrix A_orig; + blaze::StaticMatrix A_orig; makePositiveDefinite(A_orig); - StaticMatrix A = A_orig; + blaze::StaticMatrix A = A_orig; for (size_t i = 0; i < M; ++i) for (size_t j = i + 1; j < M; ++j) reset(A(i, j)); @@ -96,4 +97,4 @@ namespace blast :: testing INSTANTIATE_TYPED_TEST_SUITE_P(double, DensePotrtTest, double); // INSTANTIATE_TYPED_TEST_SUITE_P(Potrf_float, DensePotrtTest, float); -} \ No newline at end of file +} diff --git a/test/blast/math/dense/StaticVectorPointerTest.cpp b/test/blast/math/dense/StaticVectorPointerTest.cpp index c25d3a43..a8050c8f 100644 --- a/test/blast/math/dense/StaticVectorPointerTest.cpp +++ b/test/blast/math/dense/StaticVectorPointerTest.cpp @@ -2,7 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include +#include +#include #include @@ -114,4 +115,4 @@ namespace blast :: testing { this->template testMatrixRowSubvectorImpl(); } -} \ No newline at end of file +} diff --git a/test/blast/math/dense/SyrkTest.cpp b/test/blast/math/dense/SyrkTest.cpp index 7a2334ae..897b4d34 100644 --- a/test/blast/math/dense/SyrkTest.cpp +++ b/test/blast/math/dense/SyrkTest.cpp @@ -6,11 +6,12 @@ #include - - #include #include +#include + + namespace blast :: testing { template @@ -32,8 +33,8 @@ namespace blast :: testing { // Init Blaze matrices // - DynamicMatrix A(m, k); - DynamicMatrix C(m, m), D(m, m); + blaze::DynamicMatrix A(m, k); + blaze::DynamicMatrix C(m, m), D(m, m); randomize(A); makeSymmetric(C); // for (size_t i = 0; i < m; ++i) @@ -71,4 +72,4 @@ namespace blast :: testing INSTANTIATE_TYPED_TEST_SUITE_P(double, DenseSyrkTest, double); -} \ No newline at end of file +} diff --git a/test/blast/math/dense/TrmmTest.cpp b/test/blast/math/dense/TrmmTest.cpp index e5c08041..04005df3 100644 --- a/test/blast/math/dense/TrmmTest.cpp +++ b/test/blast/math/dense/TrmmTest.cpp @@ -3,9 +3,8 @@ // license that can be found in the LICENSE file. #include -#include - - +#include +#include #include #include @@ -70,4 +69,4 @@ namespace blast :: testing << "trmm error at size m,n=" << m << "," << n; } } -} \ No newline at end of file +} diff --git a/test/blast/math/expressions/PMatTransExprTest.cpp b/test/blast/math/expressions/PMatTransExprTest.cpp index d268c250..bd850f35 100644 --- a/test/blast/math/expressions/PMatTransExprTest.cpp +++ b/test/blast/math/expressions/PMatTransExprTest.cpp @@ -4,12 +4,10 @@ #include #include -#include -#include #include #include -#include + namespace blast :: testing { diff --git a/test/blast/math/panel/GemmTest.cpp b/test/blast/math/panel/GemmTest.cpp index 2accdfcb..71e88bd1 100644 --- a/test/blast/math/panel/GemmTest.cpp +++ b/test/blast/math/panel/GemmTest.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include diff --git a/test/blast/math/panel/PotrfTest.cpp b/test/blast/math/panel/PotrfTest.cpp index 28c27115..46502097 100644 --- a/test/blast/math/panel/PotrfTest.cpp +++ b/test/blast/math/panel/PotrfTest.cpp @@ -4,9 +4,8 @@ #include #include -#include - -#include +#include +#include #include #include diff --git a/test/blast/math/panel/StaticPanelMatrixTest.cpp b/test/blast/math/panel/StaticPanelMatrixTest.cpp index c319a422..486b61c8 100644 --- a/test/blast/math/panel/StaticPanelMatrixTest.cpp +++ b/test/blast/math/panel/StaticPanelMatrixTest.cpp @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. #include -#include +#include #include #include diff --git a/test/blast/math/simd/DynamicRegisterMatrixTest.cpp b/test/blast/math/simd/DynamicRegisterMatrixTest.cpp index ae1f91fb..45908e9b 100644 --- a/test/blast/math/simd/DynamicRegisterMatrixTest.cpp +++ b/test/blast/math/simd/DynamicRegisterMatrixTest.cpp @@ -4,8 +4,9 @@ #include #include -#include +#include #include +#include #include #include @@ -223,4 +224,4 @@ namespace blast :: testing // std::clog << "DynamicRegisterMatrixTest.testPotrf not implemented for kernels with columns more than rows!" << std::endl; // } // } -} \ No newline at end of file +} diff --git a/test/blast/math/simd/RegisterMatrixTest.cpp b/test/blast/math/simd/RegisterMatrixTest.cpp index a09865f1..2ad3866a 100644 --- a/test/blast/math/simd/RegisterMatrixTest.cpp +++ b/test/blast/math/simd/RegisterMatrixTest.cpp @@ -4,17 +4,15 @@ #include #include -#include -#include -#include +#include +#include #include +#include #include #include #include -#include - namespace blast :: testing { @@ -376,7 +374,7 @@ namespace blast :: testing blaze::randomize(alpha); TypeParam ker; - ker.load(1., ptr(C)); + ker.load(1., ptr(*C)); ker.ger(alpha, ptr(a), ptr(b)); BLAST_EXPECT_APPROX_EQ(ker, evaluate(C + alpha * a * b), absTol(), relTol()); @@ -445,7 +443,7 @@ namespace blast :: testing for (size_t n = 0; n <= columns(C); ++n) { TypeParam ker; - ker.load(1., ptr(C)); + ker.load(1., ptr(*C)); ker.ger(ET(1.), ptr(a), ptr(trans(b)), m, n); for (size_t i = 0; i < m; ++i) @@ -473,9 +471,9 @@ namespace blast :: testing randomize(C); TypeParam ker; - ker.load(1., ptr(C)); + ker.load(1., ptr(*C)); ker.ger(ET(1.), ptr(a), ptr(trans(b))); - ker.store(ptr(D)); + ker.store(ptr(*D)); BLAST_EXPECT_EQ(D, evaluate(C + a * trans(b))); } @@ -643,4 +641,4 @@ namespace blast :: testing // TODO: should be strictly equal? BLAST_ASSERT_APPROX_EQ(ker, alpha * B * A, absTol(), relTol()); } -} \ No newline at end of file +}