@@ -204,6 +204,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
204
204
GGML_LOG_INFO (" %s: GGML_CUDA_FORCE_CUBLAS: no\n " , __func__);
205
205
#endif // GGML_CUDA_FORCE_CUBLAS
206
206
GGML_LOG_INFO (" %s: found %d " GGML_CUDA_NAME " devices:\n " , __func__, info.device_count );
207
+
208
+ std::vector<std::pair<int , std::string>> turing_devices_without_mma;
207
209
for (int id = 0 ; id < info.device_count ; ++id) {
208
210
int device_vmm = 0 ;
209
211
@@ -261,7 +263,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
261
263
info.devices [id].cc = 100 *prop.major + 10 *prop.minor ;
262
264
GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s\n " ,
263
265
id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" );
264
- #endif // defined(GGML_USE_HIP)
266
+ std::string device_name (prop.name );
267
+ if (device_name == " NVIDIA GeForce MX450" ) {
268
+ turing_devices_without_mma.push_back ({ id, device_name });
269
+ } else if (device_name == " NVIDIA GeForce MX550" ) {
270
+ turing_devices_without_mma.push_back ({ id, device_name });
271
+ } else if (device_name.substr (0 , 21 ) == " NVIDIA GeForce GTX 16" ) {
272
+ turing_devices_without_mma.push_back ({ id, device_name });
273
+ }
274
+ #endif // defined(GGML_USE_HIP)
275
+ }
276
+
277
+ if (ggml_cuda_highest_compiled_arch (GGML_CUDA_CC_TURING) >= GGML_CUDA_CC_TURING && !turing_devices_without_mma.empty ()) {
278
+ GGML_LOG_INFO (" The following devices will have suboptimal performance due to a lack of tensor cores:\n " );
279
+ for (size_t device_pos = 0 ; device_pos < turing_devices_without_mma.size (); device_pos++) {
280
+ GGML_LOG_INFO (
281
+ " Device %d: %s\n " , turing_devices_without_mma[device_pos].first , turing_devices_without_mma[device_pos].second .c_str ());
282
+ }
283
+ GGML_LOG_INFO (
284
+ " Consider compiling with CMAKE_CUDA_ARCHITECTURES=61-virtual;80-virtual and DGGML_CUDA_FORCE_MMQ to force the use of the Pascal code for Turing.\n " );
265
285
}
266
286
267
287
for (int id = 0 ; id < info.device_count ; ++id) {
0 commit comments