Skip to content

Commit 5e73225

Browse files
committed
Docs: Remove namespace nesting
1 parent 32d7d3e commit 5e73225

File tree

9 files changed

+26
-16
lines changed

9 files changed

+26
-16
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"cSpell.words": [
33
"ashvardanian",
4+
"Autovectorized",
45
"blas",
56
"blasint",
67
"cblas",

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ You are expected to build this on an x86 machine with CUDA drivers installed.
5353

5454
```sh
5555
cmake -B build_release -D CMAKE_BUILD_TYPE=Release # Generate the build files
56-
cmake --build build_release --config Release # Build the project
56+
cmake --build build_release --config Release -j # Build the project
5757
build_release/reduce_bench # Run all benchmarks
5858
build_release/reduce_bench --benchmark_filter="cuda" # Only CUDA-related
5959
PARALLEL_REDUCTIONS_LENGTH=1024 build_release/reduce_bench # Set a different input size

reduce_bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
#endif
6161

6262
namespace bm = benchmark;
63-
using namespace ashvardanian::reduce;
63+
using namespace ashvardanian;
6464

6565
/**
6666
* @brief Wraps the memory allocated for the benchmark either from `malloc` or `mmap`.

reduce_blas.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include <limits> // `std::numeric_limits`
1010
#include <stdexcept> // `std::length_error`
1111

12-
namespace ashvardanian::reduce {
12+
namespace ashvardanian {
1313

1414
/**
1515
* @brief Using BLAS dot-product interface to accumulate a vector.
@@ -42,4 +42,4 @@ class blas_dot_t {
4242
}
4343
};
4444

45-
} // namespace ashvardanian::reduce
45+
} // namespace ashvardanian

reduce_cpu.hpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/**
2-
* @date 04/09/2019
3-
* @file reduce_cpu.hpp
42
* @brief Parallel reduction with SIMD and multicore acceleration
3+
* @file reduce_cpu.hpp
54
* @author Ash Vardanian
5+
* @date 04/09/2019
66
*/
77
#pragma once
88
#include <cstring> // `std::memcpy`
@@ -24,7 +24,7 @@
2424
#include <arm_sve.h> // ARM SVE intrinsics
2525
#endif
2626

27-
namespace ashvardanian::reduce {
27+
namespace ashvardanian {
2828

2929
/**
3030
* @brief Returns the current number of logical cores on the CPU.
@@ -41,6 +41,8 @@ inline static std::size_t round_up_to_multiple(std::size_t value, std::size_t mu
4141
return ((value + multiple - 1) / multiple) * multiple;
4242
}
4343

44+
#pragma region - Serial and Autovectorized
45+
4446
/**
4547
* @brief Computes the sum of a sequence of float values using an unrolled @b `for`-loop,
4648
* accumulating into 8 separate registers and summing them at the end.
@@ -131,6 +133,11 @@ class stl_par_unseq_reduce_gt {
131133

132134
#endif // defined(__cpp_lib_execution)
133135

136+
#pragma endregion - Serial and Autovectorized
137+
138+
#pragma region - Handwritten SIMD Kernels
139+
#pragma region x86
140+
134141
#if defined(__SSE__)
135142

136143
/**
@@ -614,4 +621,6 @@ class threads_gt {
614621
}
615622
};
616623

617-
} // namespace ashvardanian::reduce
624+
#pragma endregion - Multicore
625+
626+
} // namespace ashvardanian

reduce_cublas.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
using namespace nvcuda;
1818

19-
namespace ashvardanian::reduce {
19+
namespace ashvardanian {
2020

2121
/**
2222
* @brief Using cuBLAS dot-product interfaces to accumulate a vector.
@@ -164,4 +164,4 @@ struct cuda_tensors_t {
164164
}
165165
};
166166

167-
} // namespace ashvardanian::reduce
167+
} // namespace ashvardanian

reduce_cuda.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
#include <cub/cub.cuh>
1515

16-
namespace ashvardanian::reduce {
16+
namespace ashvardanian {
1717

1818
std::size_t cuda_device_count() noexcept {
1919
int count;
@@ -280,4 +280,4 @@ class cuda_cub_t {
280280
}
281281
};
282282

283-
} // namespace ashvardanian::reduce
283+
} // namespace ashvardanian

reduce_metal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include <cstdio>
1818
#include <cstring>
1919

20-
namespace ashvardanian::reduce {
20+
namespace ashvardanian {
2121

2222
struct metal_t {
2323

@@ -216,6 +216,6 @@ struct metal_t {
216216
}
217217
};
218218

219-
} // namespace ashvardanian::reduce
219+
} // namespace ashvardanian
220220

221221
#endif

reduce_opencl.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include <CL/cl.h>
2020
#endif
2121

22-
namespace ashvardanian::reduce {
22+
namespace ashvardanian {
2323

2424
/**
2525
* @brief OpenCL target device information, including its name, driver version,
@@ -337,4 +337,4 @@ char const *opencl_error_name(cl_int code) noexcept {
337337
}
338338
}
339339

340-
} // namespace ashvardanian::reduce
340+
} // namespace ashvardanian

0 commit comments

Comments
 (0)