@@ -264,20 +264,47 @@ if get_option('USE_SYCL')
264
264
mlink_args = [' -fsycl' ]
265
265
has_backends = true
266
266
message (' Building SYCL' )
267
- add_project_arguments (' -O3' , language : ' cpp' )
268
- add_project_arguments (' -fsycl' , language : ' cpp' )
269
- add_project_arguments (' -ffast-math' , language : ' cpp' )
270
- add_project_arguments (' -fsycl-unnamed-lambda' , language : ' cpp' )
271
- add_project_arguments (' -Wall' , language : ' cpp' )
272
- add_project_arguments (' -Wextra' , language : ' cpp' )
273
267
274
268
files += ' src/neural/sycl/layers.cc.dp.cpp'
275
269
files += ' src/neural/sycl/network_sycl.cc.dp.cpp'
276
270
files += ' src/neural/sycl/common_kernels.dp.cpp'
277
271
272
+
273
+ DEF_INTEL_GENERAL_CXX_FLAGS = [' -O3' ,' -fsycl' ,' -ffast-math' ,' -fsycl-unnamed-lambda' ,' -Wall' , ' -Wextra' ]
274
+ DEF_INTEL_WL_CXX_FLAGS = [' -DDEFAULT_MINIBATCH_SIZE=248' , ' -DMKL_ILP64' ]
275
+ DEF_AMD_GENERAL_CXX_FLAGS = [' -O3' ,' -fsycl' ,' -ffast-math' ,' -fsycl-unnamed-lambda' ,' -Wall' , ' -Wextra' ]
276
+ DEF_AMD_WL_CXX_FLAGS = [' -DUSE_HIPBLAS' , ' -DINLINE' , ' -D__HIP_PLATFORM_AMD__' ]
277
+ DEF_NVIDIA_GENERAL_CXX_FLAGS = [' -O3' ,' -fsycl' ,' -ffast-math' ,' -fsycl-unnamed-lambda' ,' -Wall' , ' -Wextra' ]
278
+ DEF_NVIDIA_WL_CXX_FLAGS= [' -DUSE_CUBLAS' , ' -DINLINE' , ' -DNVIDIABE' ]
279
+
280
+
281
+ if (get_option (' CMAKE_CXX_FLAGS' ) != [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
282
+ message (' Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together' )
283
+ elif (get_option (' CMAKE_CXX_FLAGS' )== [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) == [])
284
+ message (' Using DEFAULT compilation flags' )
285
+ INTEL_GPU_CXX_FLAGS = DEF_INTEL_GENERAL_CXX_FLAGS + DEF_INTEL_WL_CXX_FLAGS
286
+ NVIDIA_GPU_CXX_FLAGS = DEF_NVIDIA_GENERAL_CXX_FLAGS + DEF_NVIDIA_WL_CXX_FLAGS
287
+ AMD_GPU_CXX_FLAGS = DEF_AMD_GENERAL_CXX_FLAGS + DEF_AMD_WL_CXX_FLAGS
288
+ elif (get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
289
+ message (' OVERRIDING GENERAL compilation flags' )
290
+ INTEL_GPU_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_INTEL_WL_CXX_FLAGS
291
+ NVIDIA_GPU_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_NVIDIA_WL_CXX_FLAGS
292
+ AMD_GPU_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_AMD_WL_CXX_FLAGS
293
+ elif (get_option (' CMAKE_CXX_FLAGS' ) != [])
294
+ message (' OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags' )
295
+ INTEL_GPU_CXX_FLAGS = get_option (' CMAKE_CXX_FLAGS' )
296
+ NVIDIA_GPU_CXX_FLAGS = get_option (' CMAKE_CXX_FLAGS' )
297
+ AMD_GPU_CXX_FLAGS = get_option (' CMAKE_CXX_FLAGS' )
298
+ endif
299
+
300
+ INTEL_GPU_CXX_FLAGS += [get_option (' GPU_AOT' )]
301
+ NVIDIA_GPU_CXX_FLAGS += [' -fsycl-targets=nvidia_gpu_sm_' + get_option (' USE_SM' )]
302
+ AMD_GPU_CXX_FLAGS += [' -fsycl-targets=amd_gpu_gfx' + get_option (' USE_SM' )]
303
+
304
+
278
305
if (get_option (' USE_L0_BACKEND' ) == true )
279
306
message (' Building SYCL for the L0 backend' )
280
- add_project_arguments (' -DMKL_ILP64 ' , language : ' cpp' )
307
+ add_project_arguments (INTEL_GPU_CXX_FLAGS , language : ' cpp' )
281
308
deps += cc.find_library (' sycl' , required : true )
282
309
deps += cc.find_library (' mkl_sycl' , required : true )
283
310
deps += cc.find_library (' mkl_intel_ilp64' , required : true )
@@ -286,39 +313,48 @@ if get_option('USE_SYCL')
286
313
deps += cc.find_library (' OpenCL' , required : true )
287
314
deps += cc.find_library (' dl' , required : true )
288
315
deps += cc.find_library (' m' , required : true )
289
- add_project_arguments (' -DDEFAULT_MINIBATCH_SIZE=248' , language : ' cpp' )
290
- add_project_arguments (get_option (' GPU_AOT' ), language : ' cpp' )
291
- mlink_args += get_option (' GPU_AOT' )
316
+ mlink_args += INTEL_GPU_CXX_FLAGS
292
317
elif (get_option (' USE_AMD_BACKEND' ) == true )
293
318
message (' Building SYCL for AMD backend' )
294
- sm_level = ' amd_gpu_' + get_option (' USE_SM' )
295
- add_project_arguments (' -fsycl-targets=' + sm_level , language : ' cpp' )
296
- add_project_arguments (' -DUSE_HIPBLAS' , language : ' cpp' )
297
- add_project_arguments (' -D__HIP_PLATFORM_AMD__' , language : ' cpp' )
298
- add_project_arguments (' -DINLINE' , language : ' cpp' )
319
+ add_project_arguments (AMD_GPU_CXX_FLAGS, language : ' cpp' )
299
320
hip_blas = cc.find_library (' hipblas' , required : true )
300
321
hip_dart = cc.find_library (' amdhip64' , required : true )
301
322
deps += [hip_blas, hip_dart]
302
323
deps += cc.find_library (' sycl' , required : true )
303
- mlink_args+= [ ' -fsycl ' , ' -fsycl-targets= ' + sm_level]
324
+ mlink_args+= AMD_GPU_CXX_FLAGS
304
325
else
305
- sm_level = ' nvidia_gpu_sm_' + get_option (' USE_SM' )
306
326
message (' Building SYCL for the NVIDIA backend' )
307
- add_project_arguments (' -fsycl-targets=' + sm_level, language : ' cpp' )
308
- add_project_arguments (' -DUSE_CUBLAS' , language : ' cpp' )
309
- add_project_arguments (' -DINLINE' , language : ' cpp' )
310
- add_project_arguments (' -DNVIDIABE' , language : ' cpp' )
327
+ add_project_arguments (NVIDIA_GPU_CXX_FLAGS, language : ' cpp' )
311
328
cu_blas = cc.find_library (' cublas' , required : true )
312
329
cu_dart = cc.find_library (' cudart' , required : true )
313
- deps += [cu_blas, cu_dart]
330
+ cu_da = cc.find_library (' cuda' , required : true )
331
+ deps += [cu_blas, cu_dart, cu_da]
314
332
deps += cc.find_library (' sycl' , required : true )
315
333
deps += cc.find_library (' pthread' , required : true )
316
- mlink_args+= [ ' -fsycl ' , ' -fsycl-targets= ' + sm_level]
334
+ mlink_args+= NVIDIA_GPU_CXX_FLAGS
317
335
endif
318
-
319
- #message('Using link arguements ' + mlink_args)
320
- executable ( ' lc0_sycl ' , ' src/main.cc ' , files, include_directories : includes, dependencies : deps, install : true , link_args : mlink_args)
336
+
337
+ executable ( ' lc0_sycl ' , ' src/main.cc ' , files, include_directories : includes, dependencies : deps, install : true , link_args : mlink_args)
338
+
321
339
elif get_option (' USE_CUDA' )
340
+
341
+ DEF_WL_CXX_FLAGS = [' -Xcompiler' , ' -fPIC' ]
342
+ DEF_GENERAL_CXX_FLAGS = [' -O2' ]
343
+ DEF_COMBINED_CXX_FLAGS = DEF_WL_CXX_FLAGS + DEF_GENERAL_CXX_FLAGS
344
+
345
+ if (get_option (' CMAKE_CXX_FLAGS' ) != [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
346
+ message (' Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together' )
347
+ elif (get_option (' CMAKE_CXX_FLAGS' )== [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) == [])
348
+ message (' Using DEFAULT compilation flags' )
349
+ CMAKE_CXX_FLAGS = DEF_COMBINED_CXX_FLAGS
350
+ elif (get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
351
+ message (' OVERRIDING GENERAL compilation flags' )
352
+ CMAKE_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_WL_CXX_FLAGS
353
+ elif (get_option (' CMAKE_CXX_FLAGS' ) != [])
354
+ message (' OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags' )
355
+ endif
356
+
357
+
322
358
cudnn_libdirs = get_option (' cudnn_libdirs' )
323
359
cu_blas = cc.find_library (' cublas' , dirs : cudnn_libdirs, required : false )
324
360
cu_dnn = cc.find_library (' cudnn' , dirs : cudnn_libdirs, required : false )
@@ -356,7 +392,8 @@ elif get_option('USE_CUDA')
356
392
cuda_arguments += [' -Xcompiler' , ' -MD' ]
357
393
endif
358
394
else
359
- cuda_arguments += [' --std=c++14' , ' -Xcompiler' , ' -fPIC' ]
395
+ cuda_arguments += CMAKE_CXX_FLAGS
396
+ #cuda_arguments += ['--std=c++14', '-Xcompiler', '-fPIC']
360
397
endif
361
398
if get_option (' nvcc_ccbin' ) != ''
362
399
cuda_arguments += [' -ccbin=' + get_option (' nvcc_ccbin' )]
@@ -374,6 +411,7 @@ elif get_option('USE_CUDA')
374
411
else
375
412
376
413
endif
414
+ nvcc_extra_args += get_option (' CUDA_NVCC_FLAGS' )
377
415
files += cuda_files
378
416
files += custom_target (' cuda fp32 code' ,
379
417
input : ' src/neural/cuda/common_kernels.cu' ,
@@ -383,18 +421,19 @@ elif get_option('USE_CUDA')
383
421
)
384
422
385
423
# Handling of fp16 cuda code.
386
- nvcc_arch = ' -arch=compute_' + get_option (' USE_SM' )
424
+ # nvcc_arch = '-arch=compute_' + get_option('USE_SM')
387
425
nvcc_sm_list = [' sm_' + get_option (' USE_SM' )]
388
426
# Ignore the given CC for fp16 when it is not in the supported list.
389
427
if cuda_cc == '' or not nvcc_sm_list.contains(' sm_' + cuda_cc)
390
- nvcc_extra_args = [nvcc_arch ]
428
+ nvcc_extra_args = []
391
429
nvcc_help = run_command (nvcc, ' -h' ).stdout()
392
430
foreach x : nvcc_sm_list
393
431
if nvcc_help.contains(x)
394
- nvcc_extra_args += ' -code =' + x
432
+ nvcc_extra_args += ' -arch =' + x
395
433
endif
396
434
endforeach
397
435
endif
436
+ nvcc_extra_args += get_option (' CUDA_NVCC_FLAGS' )
398
437
files += custom_target (' cuda fp16 code' ,
399
438
input : ' src/neural/cuda/fp16_kernels.cu' ,
400
439
output : outputname,
@@ -411,8 +450,23 @@ elif get_option('USE_AMD')
411
450
files += ' src/neural/amd/network_amd.cpp'
412
451
files += ' src/neural/amd/common_kernels.cpp'
413
452
414
- add_project_arguments (' -D__HIP_PLATFORM_AMD__' , language : ' cpp' )
415
- add_project_arguments (' -O3' , language : ' cpp' )
453
+ DEF_WL_CXX_FLAGS = [' -D__HIP_PLATFORM_AMD__' ]
454
+ DEF_GENERAL_CXX_FLAGS = [' -O3' ]
455
+ DEF_COMBINED_CXX_FLAGS = DEF_WL_CXX_FLAGS + DEF_GENERAL_CXX_FLAGS
456
+
457
+ if (get_option (' CMAKE_CXX_FLAGS' ) != [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
458
+ message (' Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together' )
459
+ elif (get_option (' CMAKE_CXX_FLAGS' )== [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) == [])
460
+ message (' Using DEFAULT compilation flags' )
461
+ CMAKE_CXX_FLAGS = DEF_COMBINED_CXX_FLAGS
462
+ elif (get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
463
+ message (' OVERRIDING GENERAL compilation flags' )
464
+ CMAKE_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_WL_CXX_FLAGS
465
+ elif (get_option (' CMAKE_CXX_FLAGS' ) != [])
466
+ message (' OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags' )
467
+ endif
468
+
469
+ add_project_arguments (CMAKE_CXX_FLAGS, language : ' cpp' )
416
470
417
471
hip_blas_lib = cc.find_library (' hipblas' , required : true )
418
472
hip_blas_runtime = cc.find_library (' hipblas' , required : true )
427
481
endif
428
482
429
483
484
+
430
485
431
486
0 commit comments