Skip to content

Commit c36d2b8

Browse files
committed
Add: gpu_specs and cuda_status_t
1 parent 3c8d181 commit c36d2b8

File tree

2 files changed

+33
-27
lines changed

2 files changed

+33
-27
lines changed

include/stringcuzilla/types.cuh

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
#ifndef STRINGZILLA_TYPES_CUH_
1212
#define STRINGZILLA_TYPES_CUH_
1313

14-
#include <cuda_runtime.h> // `cudaMallocManaged`, `cudaFree`, `cudaSuccess`, `cudaGetErrorString`
15-
1614
#include "stringzilla/types.hpp"
1715

16+
#include <cuda_runtime.h> // `cudaMallocManaged`, `cudaFree`, `cudaSuccess`, `cudaGetErrorString`
17+
#include <optional> // `std::optional`
18+
1819
#if !defined(SZ_USE_HOPPER)
1920
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ < 11)
2021
#define SZ_USE_HOPPER (1)
@@ -78,6 +79,35 @@ struct unified_alloc {
7879
}
7980
};
8081

82+
inline std::optional<gpu_specs_t> gpu_specs(int device = 0) noexcept {
83+
gpu_specs_t specs;
84+
cudaDeviceProp prop;
85+
cudaError_t cuda_error = cudaGetDeviceProperties(&prop, device);
86+
if (cuda_error != cudaSuccess) return std::nullopt; // ! Failed to get device properties
87+
88+
// Set the GPU specs
89+
specs.streaming_multiprocessors = prop.multiProcessorCount;
90+
specs.constant_memory_bytes = prop.totalConstMem;
91+
specs.vram_bytes = prop.totalGlobalMem;
92+
93+
// Infer other global settings, that CUDA doesn't expose directly
94+
specs.shared_memory_bytes = prop.sharedMemPerMultiprocessor * prop.multiProcessorCount;
95+
specs.cuda_cores = gpu_specs_t::cores_per_multiprocessor(prop.major, prop.minor) * specs.streaming_multiprocessors;
96+
97+
// Scheduling-related constants
98+
specs.max_blocks_per_multiprocessor = prop.maxBlocksPerMultiProcessor;
99+
specs.reserved_memory_per_block = prop.reservedSharedMemPerBlock;
100+
return specs;
101+
}
102+
103+
struct cuda_status_t {
104+
status_t status = status_t::success_k;
105+
cudaError_t cuda_error = cudaSuccess;
106+
float elapsed_milliseconds = 0.0;
107+
108+
inline operator status_t() const noexcept { return status; }
109+
};
110+
81111
} // namespace stringzilla
82112
} // namespace ashvardanian
83113

scripts/test_stringcuzilla.cuh

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,6 @@ namespace ashvardanian {
2121
namespace stringzilla {
2222
namespace scripts {
2323

24-
inline gpu_specs_t gpu_specs(int device = 0) noexcept(false) {
25-
gpu_specs_t specs;
26-
#if SZ_USE_CUDA
27-
cudaDeviceProp prop;
28-
cudaError_t cuda_error = cudaGetDeviceProperties(&prop, device);
29-
if (cuda_error != cudaSuccess)
30-
throw std::runtime_error(std::string("Error retrieving device properties: ") + cudaGetErrorString(cuda_error));
31-
32-
// Set the GPU specs
33-
specs.streaming_multiprocessors = prop.multiProcessorCount;
34-
specs.constant_memory_bytes = prop.totalConstMem;
35-
specs.vram_bytes = prop.totalGlobalMem;
36-
37-
// Infer other global settings, that CUDA doesn't expose directly
38-
specs.shared_memory_bytes = prop.sharedMemPerMultiprocessor * prop.multiProcessorCount;
39-
specs.cuda_cores = gpu_specs_t::cores_per_multiprocessor(prop.major, prop.minor) * specs.streaming_multiprocessors;
40-
41-
// Scheduling-related constants
42-
specs.max_blocks_per_multiprocessor = prop.maxBlocksPerMultiProcessor;
43-
specs.reserved_memory_per_block = prop.reservedSharedMemPerBlock;
44-
#endif
45-
return specs;
46-
}
47-
4824
int log_environment() {
4925
std::printf("- Uses Haswell: %s \n", SZ_USE_HASWELL ? "yes" : "no");
5026
std::printf("- Uses Skylake: %s \n", SZ_USE_SKYLAKE ? "yes" : "no");
@@ -568,7 +544,7 @@ void test_similarity_scores_memory_usage() {
568544
{.batch_size = 10, .min_string_length = 1, .max_string_length = 131072, .iterations = 1},
569545
};
570546

571-
gpu_specs_t first_gpu_specs = gpu_specs();
547+
gpu_specs_t first_gpu_specs = *gpu_specs();
572548

573549
// Progress until something fails
574550
for (fuzzy_config_t const &experiment : experiments) {

0 commit comments

Comments
 (0)