Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ if (USE_SW)
set(SW ON)
include_directories(${SW_MATH}/include)
include_directories(${SW_FFT}/include)

target_link_libraries(${ABACUS_BIN_NAME} ${SW_FFT}/lib/libfftw3.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a)
Expand Down Expand Up @@ -373,6 +373,7 @@ if(USE_CUDA)
if(USE_CUDA)
add_compile_definitions(__CUDA)
add_compile_definitions(__UT_USE_CUDA)
target_compile_definitions(${ABACUS_BIN_NAME} PRIVATE __USE_NVTX)
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G" CACHE STRING "CUDA flags for debug build" FORCE)
endif()
Expand Down Expand Up @@ -520,7 +521,7 @@ if(ENABLE_MLALGO)
include_directories(${libnpy_INCLUDE_DIR})
endif()
include_directories(${libnpy_SOURCE_DIR}/include)

add_compile_definitions(__MLALGO)
endif()

Expand Down Expand Up @@ -560,7 +561,7 @@ if (ENABLE_CNPY)
include_directories(${cnpy_INCLUDE_DIR})
endif()
include_directories(${cnpy_SOURCE_DIR})

# find ZLIB and link
find_package(ZLIB REQUIRED)
target_link_libraries(${ABACUS_BIN_NAME} cnpy ZLIB::ZLIB)
Expand Down
10 changes: 10 additions & 0 deletions docs/advanced/input_files/input-main.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
- [min\_dist\_coef](#min_dist_coef)
- [device](#device)
- [precision](#precision)
- [timer_enable_nvtx](#timer_enable_nvtx)
- [nb2d](#nb2d)
- [Input Files](#variables-related-to-input-files)
- [stru\_file](#stru_file)
Expand Down Expand Up @@ -706,6 +707,15 @@ If only one value is set (such as `kspacing 0.5`), then kspacing values of a/b/c
- double: double precision
- **Default**: double

### timer_enable_nvtx

- **Type**: Boolean
- **Description**: Controls whether NVTX profiling labels are emitted by the timer. This feature is only effective on CUDA platforms.

- True: Enable NVTX profiling labels in the timer.
- False: Disable NVTX profiling labels in the timer.
- **Default**: False

### nb2d

- **Type**: Integer
Expand Down
38 changes: 27 additions & 11 deletions source/source_base/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
#include "chrono"
#include "source_base/formatter.h"

#if defined(__CUDA) && defined(__USE_NVTX)
#include <nvToolsExt.h>
#include "source_io/module_parameter/parameter.h"
#endif

namespace ModuleBase
{

Expand Down Expand Up @@ -93,6 +98,12 @@ void timer::tick(const std::string &class_name,const std::string &name)
#endif
++timer_one.calls;
timer_one.start_flag = false;
#if defined(__CUDA) && defined(__USE_NVTX)
if (PARAM.inp.timer_enable_nvtx){
std::string label = class_name + ":" + name;
nvtxRangePushA(label.data());
}
#endif
}
else
{
Expand All @@ -107,6 +118,11 @@ void timer::tick(const std::string &class_name,const std::string &name)
timer_one.cpu_second += (cpu_time() - timer_one.cpu_start);
#endif
timer_one.start_flag = true;
#if defined(__CUDA) && defined(__USE_NVTX)
if (PARAM.inp.timer_enable_nvtx){
nvtxRangePop();
}
#endif
}
} // end if(!omp_get_thread_num())
}
Expand All @@ -128,7 +144,7 @@ void timer::write_to_json(std::string file_name)
int is_initialized = 0;
MPI_Initialized(&is_initialized);
if (!is_initialized) {
return;
return;
}
int my_rank = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
Expand Down Expand Up @@ -195,12 +211,12 @@ void timer::write_to_json(std::string file_name)
const Timer_One timer_one = timer_pool_B.second;
ofs << indent << indent << indent << indent << "{\n";
ofs << indent << indent << indent << indent << "\"name\": \"" << name << "\",\n";
ofs << indent << indent << indent << indent << "\"cpu_second\": "
ofs << indent << indent << indent << indent << "\"cpu_second\": "
<< std::setprecision(15) << timer_one.cpu_second << ",\n";
ofs << indent << indent << indent << indent << "\"calls\": " << timer_one.calls << ",\n";
ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": "
ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": "
<< double_to_string(timer_one.cpu_second/timer_one.calls) << ",\n";
ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": "
ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": "
<< double_to_string(timer_one.cpu_second/timer_pool[""]["total"].cpu_second) << "\n";

if (order_b == timer_pool_A.second.size())
Expand Down Expand Up @@ -283,11 +299,11 @@ void timer::print_all(std::ofstream &ofs)


// if the total time is too small, we do not calculate the percentage
if (timer_pool_order[0].second.cpu_second < 1e-9)
if (timer_pool_order[0].second.cpu_second < 1e-9)
{
pers.push_back(0);
}
else
}
else
{
pers.push_back(percentage);
}
Expand All @@ -300,10 +316,10 @@ void timer::print_all(std::ofstream &ofs)

std::vector<std::string> titles = {"CLASS_NAME", "NAME", "TIME/s", "CALLS", "AVG/s", "PER/%"};
std::vector<std::string> formats = {"%-10s", "%-10s", "%6.2f", "%8d", "%6.2f", "%6.2f"};
FmtTable time_statistics(/*titles=*/titles,
/*nrows=*/pers.size(),
/*formats=*/formats,
/*indent=*/0,
FmtTable time_statistics(/*titles=*/titles,
/*nrows=*/pers.size(),
/*formats=*/formats,
/*indent=*/0,
/*align=*/{/*value*/FmtTable::Align::LEFT, /*title*/FmtTable::Align::CENTER});
time_statistics << class_names << names << times << calls << avgs << pers;
const std::string table = "\nTIME STATISTICS\n" + time_statistics.str();
Expand Down
21 changes: 11 additions & 10 deletions source/source_io/module_parameter/input_parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct Input_para

std::string device = "auto";
std::string precision = "double";
bool timer_enable_nvtx = false;

// ============== #Parameters (2.Electronic structure) ===========================
std::string ks_solver = "default"; ///< xiaohui add 2013-09-01
Expand Down Expand Up @@ -375,7 +376,7 @@ struct Input_para
bool out_proj_band = false; ///< projected band structure calculation jiyy add 2022-05-11
std::string out_level = "ie"; ///< control the output information.
std::vector<int> out_dmr = {0, 8}; ///< output density matrix in real space DM(R)
std::vector<int> out_dmk = {0, 8}; ///< output density matrix in reciprocal space DM(k)
std::vector<int> out_dmk = {0, 8}; ///< output density matrix in reciprocal space DM(k)
bool out_bandgap = false; ///< QO added for bandgap printing
std::vector<int> out_mat_hs = {0, 8}; ///< output H matrix and S matrix in local basis.
std::vector<int> out_mat_tk = {0, 8}; ///< output T(k) matrix in local basis.
Expand Down Expand Up @@ -659,29 +660,29 @@ struct Input_para
* the following two sets of parameters are for the XC parameterization.
* The first element should be the LibXC id, to assign the analytical
* form of the eXchange and Correlation part of the functional.
*
*
* Starting from the second parameter, the parameters are the coefficients
* of the functional. For example the M06-L functional, one should refer
* to the source file (source code of LibXC)
*
*
* src/mgga_x_m06l.c
*
*
* the implementation can be found in the file
*
*
* src/maple2c/mgga_exc/mgga_x_m06l.c.
*
*
* There are 18 parameters for the exchange part, so the whole length of
* the xc_exch_ext should be 19. (MGGA_X_M06L, id = 203)
*
*
* Likewise, the correlation part can be found in corresponding files.
*
*
* PBE functional is used as the default functional for XCPNet.
*/
// src/gga_x_pbe.c
std::vector<double> xc_exch_ext = {
101, 0.8040, 0.2195149727645171};
101, 0.8040, 0.2195149727645171};
// src/gga_c_pbe.c
std::vector<double> xc_corr_ext = {
130, 0.06672455060314922, 0.031090690869654895034, 1.00000};
130, 0.06672455060314922, 0.031090690869654895034, 1.00000};
};
#endif
6 changes: 6 additions & 0 deletions source/source_io/read_input_item_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,12 @@ void ReadInput::item_system()
};
this->add_item(item);
}
{
Input_Item item("timer_enable_nvtx");
item.annotation = "enable NVTX labeling for profiling or not";
read_sync_bool(input.timer_enable_nvtx);
this->add_item(item);
}
}

} // namespace ModuleIO
Loading