Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
#endif // GGML_CUDA_FORCE_CUBLAS
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);

bool is_cc121 = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool is_cc121 = false;
bool device_schedule_spin = false;

std::vector<std::pair<int, std::string>> turing_devices_without_mma;
for (int id = 0; id < info.device_count; ++id) {
int device_vmm = 0;
Expand All @@ -229,6 +230,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
cudaDeviceProp prop;
CUDA_CHECK(cudaGetDeviceProperties(&prop, id));

is_cc121 |= prop.major == 12 && prop.minor == 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
is_cc121 |= prop.major == 12 && prop.minor == 1;
// Depending on the CUDA drivers the DGX Spark can run with a device schedule that prefers low power use.
// However, as it is plugged into a wall it should prefer maximum performance.
// TODO: add a check for a future driver version where this is fixed to avoid thrashing for > 20 CUDA contexts.
device_schedule_spin = prop.major == 12 && prop.minor == 1;


info.default_tensor_split[id] = total_vram;
total_vram += prop.totalGlobalMem;
info.devices[id].integrated = false; // Temporarily disabled due to issues with corrupted output (e.g. #15034)
Expand Down Expand Up @@ -273,6 +276,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
} else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") {
turing_devices_without_mma.push_back({ id, device_name });
}

#endif // defined(GGML_USE_HIP)
}

Expand All @@ -293,6 +297,12 @@ static ggml_cuda_device_info ggml_cuda_init() {
// configure logging to stdout
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));

// Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls.
// This fix is temporary, as the strategy will be the default in later drivers.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// This fix is temporary, as the strategy will be the default in later drivers.
// This fix is temporary, as the strategy will be the default in later drivers.

if (is_cc121) {
CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls.
// This fix is temporary, as the strategy will be the default in later drivers.
if (is_cc121) {
CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
}
if (device_schedule_spin) {
CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
}


return info;
}

Expand Down
Loading