diff --git a/src/StreamModels.h b/src/StreamModels.h index 556beb4d..af96a1c0 100644 --- a/src/StreamModels.h +++ b/src/StreamModels.h @@ -1,4 +1,5 @@ #pragma once +#include #include #if defined(CUDA) diff --git a/src/acc/ACCStream.h b/src/acc/ACCStream.h index 1b053cb4..eec7cd3e 100644 --- a/src/acc/ACCStream.h +++ b/src/acc/ACCStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include diff --git a/src/cuda/CUDAStream.h b/src/cuda/CUDAStream.h index 4b4a1a3a..0dfb3b6c 100644 --- a/src/cuda/CUDAStream.h +++ b/src/cuda/CUDAStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include diff --git a/src/hip/HIPStream.h b/src/hip/HIPStream.h index 76ef7df4..9bb6185d 100644 --- a/src/hip/HIPStream.h +++ b/src/hip/HIPStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include diff --git a/src/kokkos/KokkosStream.hpp b/src/kokkos/KokkosStream.hpp index 8e40119c..dacf4b0a 100644 --- a/src/kokkos/KokkosStream.hpp +++ b/src/kokkos/KokkosStream.hpp @@ -6,6 +6,7 @@ #pragma once +#include #include #include diff --git a/src/legacy/HCStream.h b/src/legacy/HCStream.h index a931cdb6..f07ebb70 100644 --- a/src/legacy/HCStream.h +++ b/src/legacy/HCStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include diff --git a/src/main.cpp b/src/main.cpp index ee091259..8d915da8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -28,7 +29,7 @@ size_t num_times = 100; size_t deviceIndex = 0; bool use_float = false; bool output_as_csv = false; -// Default unit of memory is MegaBytes (as per STREAM) +// Default unit of memory is MegaBytes (as per STREAM) Unit unit{Unit::Kind::MegaByte}; bool silence_errors = false; std::string csv_separator = ","; @@ -390,13 +391,21 @@ void check_solution(const size_t num_times, // Calculate the L^infty-norm relative error for (size_t i = 0; i < a.size(); ++i) { T vA = a[i], vB = b[i], vC = c[i]; - T eA = std::fabs(vA - goldA) / std::fabs(goldA); - T eB = std::fabs(vB - goldB) / std::fabs(goldB); - T eC = std::fabs(vC - goldC) / std::fabs(goldC); - check("a", a[i], goldA, eA, i); - check("b", b[i], goldB, eB, i); - check("c", c[i], goldC, eC, i); + if (!(vA == T(0) && goldA == T(0))) { + T eA = std::fabs(vA - goldA) / std::fabs(goldA); + check("a", a[i], goldA, eA, i); + } + + if (!(vB == T(0) && goldB == T(0))) { + T eB = std::fabs(vB - goldB) / std::fabs(goldB); + check("b", b[i], goldB, eB, i); + } + + if (!(vC == T(0) && goldC == T(0))) { + T eC = std::fabs(vC - goldC) / std::fabs(goldC); + check("c", c[i], goldC, eC, i); + } } if (failed > 0 && !silence_errors) diff --git a/src/ocl/OCLStream.h b/src/ocl/OCLStream.h index e2366dad..0dbf568e 100644 --- a/src/ocl/OCLStream.h +++ b/src/ocl/OCLStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include diff --git a/src/omp/OMPStream.h b/src/omp/OMPStream.h index 40770005..90c1d681 100644 --- a/src/omp/OMPStream.h +++ b/src/omp/OMPStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include diff --git a/src/raja/RAJAStream.hpp b/src/raja/RAJAStream.hpp index e98b0778..1413a4dc 100644 --- a/src/raja/RAJAStream.hpp +++ b/src/raja/RAJAStream.hpp @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include "RAJA/RAJA.hpp" diff --git a/src/std-data/STDDataStream.h b/src/std-data/STDDataStream.h index d92864be..96a53f04 100644 --- a/src/std-data/STDDataStream.h +++ b/src/std-data/STDDataStream.h @@ -7,6 +7,7 @@ #pragma once #include "dpl_shim.h" +#include #include #include #include "Stream.h" diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h index 8a8f5de8..d4e1a397 100644 --- a/src/std-indices/STDIndicesStream.h +++ b/src/std-indices/STDIndicesStream.h @@ -7,6 +7,7 @@ #pragma once #include "dpl_shim.h" +#include #include #include #include "Stream.h" diff --git a/src/std-ranges/STDRangesStream.hpp b/src/std-ranges/STDRangesStream.hpp index 51680c62..be45d0b8 100644 --- a/src/std-ranges/STDRangesStream.hpp +++ b/src/std-ranges/STDRangesStream.hpp @@ -7,6 +7,7 @@ #pragma once #include "dpl_shim.h" +#include #include #include #include "Stream.h" diff --git a/src/sycl/SYCLStream.h b/src/sycl/SYCLStream.h index 1a40242d..d8913841 100644 --- a/src/sycl/SYCLStream.h +++ b/src/sycl/SYCLStream.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include "Stream.h" diff --git a/src/sycl2020-acc/SYCLStream2020.h b/src/sycl2020-acc/SYCLStream2020.h index cd515f87..1f37dabb 100644 --- a/src/sycl2020-acc/SYCLStream2020.h +++ b/src/sycl2020-acc/SYCLStream2020.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include diff --git a/src/sycl2020-usm/SYCLStream2020.h b/src/sycl2020-usm/SYCLStream2020.h index 811c26ef..b29fad8f 100644 --- a/src/sycl2020-usm/SYCLStream2020.h +++ b/src/sycl2020-usm/SYCLStream2020.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include diff --git a/src/tbb/TBBStream.hpp b/src/tbb/TBBStream.hpp index 80f11c17..a564f191 100644 --- a/src/tbb/TBBStream.hpp +++ b/src/tbb/TBBStream.hpp @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include "tbb/tbb.h" @@ -36,7 +37,7 @@ template class TBBStream : public Stream { protected: - + tbb_partitioner partitioner; tbb::blocked_range range; // Device side pointers diff --git a/src/thrust/ThrustStream.h b/src/thrust/ThrustStream.h index b0acd80f..ca823c19 100644 --- a/src/thrust/ThrustStream.h +++ b/src/thrust/ThrustStream.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #if defined(MANAGED)