diff --git a/CMakeLists.txt b/CMakeLists.txt index 84bed9ad77..f154246472 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,8 @@ cmake_dependent_option( T8CODE_BUILD_DOCUMENTATION_SPHINX "Build t8code's docume set(T8CODE_CUSTOM_PARALLEL_TEST_COMMAND "" CACHE STRING "Define a custom command for parallel tests , e.g.: mpirun -np 8 (overwrites standard mpirun -np 4 if build with mpi)") set(T8CODE_CUSTOM_SERIAL_TEST_COMMAND "" CACHE STRING "Define a custom command for serial tests.") +option( T8CODE_ENABLE_PROFILING_BARRIERS "Use MPI-Barriers for profiling" OFF ) + # Reading the option T8CODE_TEST_LEVEL: Possible choices are T8_TEST_LEVEL_FULL, T8_TEST_LEVEL_MEDIUM, or T8_TEST_LEVEL_BASIC. Default is T8_TEST_LEVEL_FULL. set(T8CODE_TEST_LEVEL "T8_TEST_LEVEL_FULL" CACHE STRING "Test level: T8_TEST_LEVEL_FULL for full tests, T8_TEST_LEVEL_MEDIUM for less thorough tests, T8_TEST_LEVEL_BASIC for minimal tests. (WARNING: Use with care.)") set_property(CACHE T8CODE_TEST_LEVEL PROPERTY STRINGS "T8_TEST_LEVEL_FULL" "T8_TEST_LEVEL_MEDIUM" "T8_TEST_LEVEL_BASIC") diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3293f0127b..2d8b305e60 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -44,6 +44,9 @@ function( add_t8_benchmark ) if( T8CODE_EXPORT_COMPILE_COMMANDS ) set_target_properties( ${ADD_T8_BENCHMARK_NAME} PROPERTIES EXPORT_COMPILE_COMMANDS ON ) endif( T8CODE_EXPORT_COMPILE_COMMANDS ) + if( T8CODE_ENABLE_PROFILING_BARRIERS ) + target_compile_definitions( ${ADD_T8_BENCHMARK_NAME} PUBLIC T8_ENABLE_PROFILE_BARRIER=1 ) + endif( T8CODE_ENABLE_PROFILING_BARRIERS ) install( TARGETS ${ADD_T8_BENCHMARK_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR} ) endfunction() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d4d4e90ce2..4cb5a5ea2f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,6 +51,10 @@ if( T8CODE_EXPORT_COMPILE_COMMANDS ) set_target_properties( T8 PROPERTIES EXPORT_COMPILE_COMMANDS ON ) endif( T8CODE_EXPORT_COMPILE_COMMANDS ) +if( T8CODE_ENABLE_PROFILING_BARRIERS ) + target_compile_definitions( T8 PUBLIC T8_ENABLE_PROFILE_BARRIER=1 ) +endif( T8CODE_ENABLE_PROFILING_BARRIERS ) + if( T8CODE_ENABLE_NETCDF ) target_link_libraries( T8 PUBLIC NetCDF::NetCDF ) target_compile_definitions(T8 PUBLIC diff --git a/src/t8_forest/t8_forest.cxx b/src/t8_forest/t8_forest.cxx index e4c9f5f435..2ef8b7ceb4 100644 --- a/src/t8_forest/t8_forest.cxx +++ b/src/t8_forest/t8_forest.cxx @@ -3032,6 +3032,9 @@ t8_forest_commit (t8_forest_t forest) T8_ASSERT (!forest->committed); if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of commit */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->commit_runtime = sc_MPI_Wtime (); } @@ -3247,6 +3250,9 @@ t8_forest_commit (t8_forest_t forest) if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of commit */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->commit_runtime = sc_MPI_Wtime () - forest->profile->commit_runtime; } diff --git a/src/t8_forest/t8_forest_balance.cxx b/src/t8_forest/t8_forest_balance.cxx index 7381628049..f137b98372 100644 --- a/src/t8_forest/t8_forest_balance.cxx +++ b/src/t8_forest/t8_forest_balance.cxx @@ -164,6 +164,9 @@ t8_forest_balance (t8_forest_t forest, int repartition) adap_stats = ghost_stats = partition_stats = NULL; if (forest->profile != NULL) { +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif /* Profiling is enable, so we measure the runtime of balance */ forest->profile->balance_runtime = -sc_MPI_Wtime (); /* We store the individual adapt, ghost, and partition runtimes */ @@ -285,6 +288,9 @@ t8_forest_balance (t8_forest_t forest, int repartition) if (forest->profile != NULL) { /* Profiling is enabled, so we measure the runtime of balance. */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->balance_runtime += sc_MPI_Wtime (); forest->profile->balance_rounds = count_rounds; /* Print the runtime of adapt/ghost/partition */ diff --git a/src/t8_forest/t8_forest_ghost.cxx b/src/t8_forest/t8_forest_ghost.cxx index f5c578d696..8eccb1acc0 100644 --- a/src/t8_forest/t8_forest_ghost.cxx +++ b/src/t8_forest/t8_forest_ghost.cxx @@ -1551,6 +1551,9 @@ t8_forest_ghost_create_ext (t8_forest_t forest, int unbalanced_version) if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of ghost_create */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->ghost_runtime = -sc_MPI_Wtime (); /* DO NOT DELETE THE FOLLOWING line. * even if you do not want this output. It fixes a bug that occurred on JUQUEEN, where the @@ -1608,6 +1611,9 @@ t8_forest_ghost_create_ext (t8_forest_t forest, int unbalanced_version) if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of ghost_create */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->ghost_runtime += sc_MPI_Wtime (); /* We also store the number of ghosts and remotes */ if (ghost != NULL) { diff --git a/src/t8_forest/t8_forest_partition.cxx b/src/t8_forest/t8_forest_partition.cxx index 994b1d5f83..a4f61a0e0c 100644 --- a/src/t8_forest/t8_forest_partition.cxx +++ b/src/t8_forest/t8_forest_partition.cxx @@ -92,6 +92,9 @@ t8_forest_partition_create_offsets (t8_forest_t forest) { if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->forest_offsets_runtime = -sc_MPI_Wtime (); } @@ -120,6 +123,9 @@ t8_forest_partition_create_offsets (t8_forest_t forest) t8_shmem_array_end_writing (forest->element_offsets); if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->forest_offsets_runtime += sc_MPI_Wtime (); } } @@ -266,6 +272,9 @@ t8_forest_partition_create_first_desc (t8_forest_t forest) { if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->first_descendant_runtime = -sc_MPI_Wtime (); } sc_MPI_Comm comm; @@ -348,6 +357,9 @@ t8_forest_partition_create_first_desc (t8_forest_t forest) #endif if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->first_descendant_runtime += sc_MPI_Wtime (); } } @@ -358,6 +370,9 @@ t8_forest_partition_create_tree_offsets (t8_forest_t forest) if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->cmesh_offsets_runtime = -sc_MPI_Wtime (); } @@ -426,6 +441,9 @@ t8_forest_partition_create_tree_offsets (t8_forest_t forest) } if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->cmesh_offsets_runtime += sc_MPI_Wtime (); } } @@ -1209,6 +1227,9 @@ t8_forest_partition (t8_forest_t forest) if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->partition_runtime = sc_MPI_Wtime (); /* DO NOT DELETE THE FOLLOWING line. @@ -1239,6 +1260,9 @@ t8_forest_partition (t8_forest_t forest) if (forest->profile != NULL) { /* If profiling is enabled, we measure the runtime of partition */ +#if T8_ENABLE_PROFILE_BARRIER + sc_MPI_Barrier (forest->mpicomm); +#endif forest->profile->partition_runtime = sc_MPI_Wtime () - forest->profile->partition_runtime; /* DO NOT DELETE THE FOLLOWING line.