Skip to content

Commit 933b8e3

Browse files
committed
Fix: Use __USE_NVTX macro to avoid NVTX linking errors in tests.
Clarify in docs that timer_enable_nvtx parameter only takes effect on CUDA platforms. Signed-off-by:Tianxiang Wang<[email protected]>, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd.
1 parent fd5c37c commit 933b8e3

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

CMakeLists.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ if (USE_SW)
287287
set(SW ON)
288288
include_directories(${SW_MATH}/include)
289289
include_directories(${SW_FFT}/include)
290-
290+
291291
target_link_libraries(${ABACUS_BIN_NAME} ${SW_FFT}/lib/libfftw3.a)
292292
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a)
293293
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a)
@@ -373,6 +373,7 @@ if(USE_CUDA)
373373
if(USE_CUDA)
374374
add_compile_definitions(__CUDA)
375375
add_compile_definitions(__UT_USE_CUDA)
376+
target_compile_definitions(${ABACUS_BIN_NAME} PRIVATE __USE_NVTX)
376377
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
377378
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G" CACHE STRING "CUDA flags for debug build" FORCE)
378379
endif()
@@ -520,7 +521,7 @@ if(ENABLE_MLALGO)
520521
include_directories(${libnpy_INCLUDE_DIR})
521522
endif()
522523
include_directories(${libnpy_SOURCE_DIR}/include)
523-
524+
524525
add_compile_definitions(__MLALGO)
525526
endif()
526527

@@ -560,7 +561,7 @@ if (ENABLE_CNPY)
560561
include_directories(${cnpy_INCLUDE_DIR})
561562
endif()
562563
include_directories(${cnpy_SOURCE_DIR})
563-
564+
564565
# find ZLIB and link
565566
find_package(ZLIB REQUIRED)
566567
target_link_libraries(${ABACUS_BIN_NAME} cnpy ZLIB::ZLIB)

docs/advanced/input_files/input-main.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ If only one value is set (such as `kspacing 0.5`), then kspacing values of a/b/c
710710
### timer_enable_nvtx
711711

712712
- **Type**: Boolean
713-
- **Description**:
713+
- **Description**: Controls whether NVTX profiling labels are emitted by the timer. This feature is only effective on CUDA platforms.
714714

715715
- True: Enable NVTX profiling labels in the timer.
716716
- False: Disable NVTX profiling labels in the timer.

source/source_base/timer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "chrono"
1515
#include "source_base/formatter.h"
1616

17-
#ifdef __CUDA
17+
#if defined(__CUDA) && defined(__USE_NVTX)
1818
#include <nvToolsExt.h>
1919
#include "source_io/module_parameter/parameter.h"
2020
#endif
@@ -98,7 +98,7 @@ void timer::tick(const std::string &class_name,const std::string &name)
9898
#endif
9999
++timer_one.calls;
100100
timer_one.start_flag = false;
101-
#ifdef __CUDA
101+
#if defined(__CUDA) && defined(__USE_NVTX)
102102
if (PARAM.inp.timer_enable_nvtx){
103103
std::string label = class_name + ":" + name;
104104
nvtxRangePushA(label.data());
@@ -118,7 +118,7 @@ void timer::tick(const std::string &class_name,const std::string &name)
118118
timer_one.cpu_second += (cpu_time() - timer_one.cpu_start);
119119
#endif
120120
timer_one.start_flag = true;
121-
#ifdef __CUDA
121+
#if defined(__CUDA) && defined(__USE_NVTX)
122122
if (PARAM.inp.timer_enable_nvtx){
123123
nvtxRangePop();
124124
}

0 commit comments

Comments
 (0)