Merge branch 'ggml-org:master' into master

Thireus · web-flow · commit 20208e9a5a2d · 2025-08-12T06:51:28.000+01:00
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -312,11 +312,11 @@ static bool turing_mma_available(const int cc) {
 }
 
 static bool ampere_mma_available(const int cc) {
-    return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
+    return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
 }
 
 static bool cp_async_available(const int cc) {
-    return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
+    return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
 }
 
 static constexpr __device__ int ggml_cuda_get_physical_warp_size() {

Original file line number	Diff line number	Diff line change
`@@ -312,11 +312,11 @@ static bool turing_mma_available(const int cc) {`
`312`	`312`	`}`
`313`	`313`
`314`	`314`	`static bool ampere_mma_available(const int cc) {`
`315`		`- return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;`
	`315`	`+ return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;`
`316`	`316`	`}`
`317`	`317`
`318`	`318`	`static bool cp_async_available(const int cc) {`
`319`		`- return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;`
	`319`	`+ return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;`
`320`	`320`	`}`
`321`	`321`
`322`	`322`	`static constexpr __device__ int ggml_cuda_get_physical_warp_size() {`