diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index d2f86e4d97..2c1e50d231 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -27,6 +27,8 @@ module m_mpi_proxy use m_mpi_common + use m_nvtx + use ieee_arithmetic ! ========================================================================== @@ -865,6 +867,7 @@ contains #ifdef MFC_MPI + call nvtxStartRange("RHS-COMM-PACKBUF") !$acc update device(v_size) if (qbmm .and. .not. polytropic) then @@ -1057,6 +1060,7 @@ contains #:endif end if #:endfor + call nvtxEndRange ! Packbuf ! Send/Recv #:for rdma_mpi in [False, True] @@ -1066,26 +1070,34 @@ contains #:if rdma_mpi !$acc data attach(p_send, p_recv) !$acc host_data use_device(p_send, p_recv) + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") !$acc update host(q_cons_buff_send, ib_buff_send) + call nvtxEndRange + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif call MPI_SENDRECV( & p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, & p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA #:if rdma_mpi !$acc end host_data !$acc end data !$acc wait #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") !$acc update device(q_cons_buff_recv) + call nvtxEndRange #:endif end if #:endfor ! Unpack Received Buffer + call nvtxStartRange("RHS-COMM-UNPACKBUF") #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 @@ -1254,6 +1266,7 @@ contains #:endif end if #:endfor + call nvtxEndRange #endif diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 2628b024b2..95b8557d71 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -622,7 +622,7 @@ contains real(kind(0d0)) :: t_start, t_finish integer :: i, j, k, l, id !< Generic loop iterators - call nvtxStartRange("Compute_RHS") + call nvtxStartRange("COMPUTE-RHS") call cpu_time(t_start) ! Association/Population of Working Variables ====================== @@ -669,7 +669,7 @@ contains gm_alpha_qp%vf) call nvtxEndRange - call nvtxStartRange("RHS-MPI") + call nvtxStartRange("RHS-COMMUNICATION") call s_populate_variables_buffers(q_prim_qp%vf, pb, mv) call nvtxEndRange @@ -683,21 +683,25 @@ contains if (qbmm) call s_mom_inv(q_cons_qp%vf, q_prim_qp%vf, mom_sp, mom_3d, pb, rhs_pb, mv, rhs_mv, idwbuff(1), idwbuff(2), idwbuff(3), nbub) - call nvtxStartRange("Viscous") - if (viscous) call s_get_viscous(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, & - qL_prim, & - qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, & - dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n, & - qR_prim, & - q_prim_qp, & - dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp, & - idwbuff(1), idwbuff(2), idwbuff(3)) - call nvtxEndRange + if (viscous) then + call nvtxStartRange("RHS-VISCOUS") + call s_get_viscous(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & + dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, & + qL_prim, & + qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, & + dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n, & + qR_prim, & + q_prim_qp, & + dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp, & + idwbuff(1), idwbuff(2), idwbuff(3)) + call nvtxEndRange + end if - call nvtxStartRange("Surface_Tension") - if (surface_tension) call s_get_capilary(q_prim_qp%vf) - call nvtxEndRange + if (surface_tension) then + call nvtxStartRange("RHS-SURFACE-TENSION") + call s_get_capilary(q_prim_qp%vf) + call nvtxEndRange + end if ! Dimensional Splitting Loop ======================================= do id = 1, num_dims @@ -777,7 +781,7 @@ contains irx%end = m; iry%end = n; irz%end = p ! =============================================================== - call nvtxStartRange("RHS_riemann_solver") + call nvtxStartRange("RHS-RIEMANN-SOLVER") ! Computing Riemann Solver Flux and Source Flux ================= @@ -801,7 +805,7 @@ contains ! Additional physics and source terms ============================== ! RHS addition for advection source - call nvtxStartRange("RHS_advection_source") + call nvtxStartRange("RHS-ADVECTION-SRC") call s_compute_advection_source_term(id, & rhs_vf, & q_cons_qp, & @@ -810,15 +814,15 @@ contains call nvtxEndRange ! RHS additions for hypoelasticity - call nvtxStartRange("RHS_Hypoelasticity") + call nvtxStartRange("RHS-HYPOELASTICITY") if (hypoelasticity) call s_compute_hypoelastic_rhs(id, & q_prim_qp%vf, & rhs_vf) call nvtxEndRange ! RHS additions for viscosity - call nvtxStartRange("RHS_add_phys") if (viscous .or. surface_tension) then + call nvtxStartRange("RHS-ADD-PHYSICS") call s_compute_additional_physics_rhs(id, & q_prim_qp%vf, & rhs_vf, & @@ -826,34 +830,38 @@ contains dq_prim_dx_qp(1)%vf, & dq_prim_dy_qp(1)%vf, & dq_prim_dz_qp(1)%vf) + call nvtxEndRange end if - call nvtxEndRange ! RHS additions for sub-grid bubbles - call nvtxStartRange("RHS_bubbles") - if (bubbles) call s_compute_bubbles_rhs(id, & - q_prim_qp%vf) - call nvtxEndRange + if (bubbles) then + call nvtxStartRange("RHS-BUBBLES-COMPUTE") + call s_compute_bubbles_rhs(id, q_prim_qp%vf) + call nvtxEndRange + end if ! RHS additions for qbmm bubbles - call nvtxStartRange("RHS_qbmm") - if (qbmm) call s_compute_qbmm_rhs(id, & - q_cons_qp%vf, & - q_prim_qp%vf, & - rhs_vf, & - flux_n(id)%vf, & - pb, & - rhs_pb, & - mv, & - rhs_mv) - call nvtxEndRange + + if (qbmm) then + call nvtxStartRange("RHS-QBMM") + call s_compute_qbmm_rhs(id, & + q_cons_qp%vf, & + q_prim_qp%vf, & + rhs_vf, & + flux_n(id)%vf, & + pb, & + rhs_pb, & + mv, & + rhs_mv) + call nvtxEndRange + end if ! END: Additional physics and source terms ========================= end do ! END: Dimensional Splitting Loop ================================= if (chemistry) then - call nvtxStartRange("RHS_Chem_Advection") + call nvtxStartRange("RHS-CHEM-ADVECTION") call s_compute_chemistry_advection_flux(flux_n, rhs_vf) call nvtxEndRange end if @@ -875,25 +883,29 @@ contains ! Additional Physics and Source Temrs ================================== ! Additions for acoustic_source - call nvtxStartRange("RHS_acoustic_src") - if (acoustic_source) call s_acoustic_src_calculations(q_cons_qp%vf(1:sys_size), & - q_prim_qp%vf(1:sys_size), & - t_step, & - rhs_vf) - call nvtxEndRange + if (acoustic_source) then + call nvtxStartRange("RHS-ACOUSTIC-SRC") + call s_acoustic_src_calculations(q_cons_qp%vf(1:sys_size), & + q_prim_qp%vf(1:sys_size), & + t_step, & + rhs_vf) + call nvtxEndRange + end if ! Add bubles source term - call nvtxStartRange("RHS_bubbles") - if (bubbles .and. (.not. adap_dt) .and. (.not. qbmm)) call s_compute_bubble_source( & - q_cons_qp%vf(1:sys_size), & - q_prim_qp%vf(1:sys_size), & - t_step, & - rhs_vf) - call nvtxEndRange + if (bubbles .and. (.not. adap_dt) .and. (.not. qbmm)) then + call nvtxStartRange("RHS-BUBBLES-SRC") + call s_compute_bubble_source( & + q_cons_qp%vf(1:sys_size), & + q_prim_qp%vf(1:sys_size), & + t_step, & + rhs_vf) + call nvtxEndRange + end if if (chemistry) then if (chem_params%reactions) then - call nvtxStartRange("RHS_Chem_Reactions") + call nvtxStartRange("RHS-CHEM-REACTIONS") call s_compute_chemistry_reaction_flux(rhs_vf, q_cons_qp%vf, q_prim_qp%vf) call nvtxEndRange end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 5be39477b6..ec8c59de4d 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1239,7 +1239,7 @@ contains integer :: save_count call cpu_time(start) - ! call nvtxStartRange("I/O") + call nvtxStartRange("SAVE-DATA") do i = 1, sys_size !$acc update host(q_cons_ts(1)%vf(i)%sf) do l = 0, p @@ -1267,7 +1267,7 @@ contains call s_write_data_files(q_cons_ts(1)%vf, q_prim_vf, save_count) - ! call nvtxEndRange + call nvtxEndRange call cpu_time(finish) if (cfl_dt) then nt = mytime/t_save diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 288fed1056..5559ef2746 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -279,7 +279,7 @@ contains integer :: i, j, k, l, q !< Generic loop iterator ! Stage 1 of 1 ===================================================== - call nvtxStartRange("Time_Step") + call nvtxStartRange("TIMESTEP") call s_compute_rhs(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg) @@ -353,7 +353,6 @@ contains end do end if - call nvtxStartRange("body_forces") if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, dt) call nvtxEndRange @@ -371,8 +370,6 @@ contains end if end if - call nvtxEndRange - ! ================================================================== end subroutine s_1st_order_tvd_rk @@ -391,7 +388,7 @@ contains call cpu_time(start) - call nvtxStartRange("Time_Step") + call nvtxStartRange("TIMESTEP") call s_compute_rhs(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg) @@ -457,9 +454,7 @@ contains end do end if - call nvtxStartRange("body_forces") if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) - call nvtxEndRange if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) @@ -532,9 +527,7 @@ contains end do end if - call nvtxStartRange("body_forces") if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2d0*dt/3d0) - call nvtxEndRange if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) @@ -573,7 +566,7 @@ contains if (.not. adap_dt) then call cpu_time(start) - call nvtxStartRange("Time_Step") + call nvtxStartRange("TIMESTEP") end if call s_compute_rhs(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg) @@ -640,9 +633,7 @@ contains end do end if - call nvtxStartRange("body_forces") if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt) - call nvtxEndRange if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) @@ -715,9 +706,7 @@ contains end do end if - call nvtxStartRange("body_forces") if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4d0) - call nvtxEndRange if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf) @@ -789,9 +778,7 @@ contains end do end if - call nvtxStartRange("body_forces") if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2d0*dt/3d0) - call nvtxEndRange if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf) @@ -832,7 +819,7 @@ contains call cpu_time(start) - call nvtxStartRange("Time_Step") + call nvtxStartRange("TIMESTEP") ! Stage 1 of 3 ===================================================== call s_adaptive_dt_bubble(t_step) @@ -935,6 +922,7 @@ contains integer :: i, j, k, l + call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) !$acc parallel loop collapse(4) gang vector default(present) @@ -949,6 +937,8 @@ contains end do end do + call nvtxEndRange + end subroutine s_apply_bodyforces !> This subroutine saves the temporary q_prim_vf vector diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 091538073e..18bab5a306 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -21,6 +21,8 @@ program p_main use m_start_up use m_time_steppers + + use m_nvtx ! ========================================================================== implicit none @@ -36,16 +38,24 @@ program p_main call system_clock(COUNT=cpu_start, COUNT_RATE=cpu_rate) + call nvtxStartRange("INIT") + !Initialize MPI + call nvtxStartRange("INIT-MPI") call s_initialize_mpi_domain() + call nvtxEndRange !Initialize Modules + call nvtxStartRange("INIT-MODULES") call s_initialize_modules() + call nvtxEndRange allocate (proc_time(0:num_procs - 1)) allocate (io_proc_time(0:num_procs - 1)) + call nvtxStartRange("INIT-GPU-VARS") call s_initialize_gpu_vars() + call nvtxEndRange ! Setting the time-step iterator to the first time-step if (cfl_dt) then @@ -61,6 +71,9 @@ program p_main finaltime = t_step_stop*dt end if + call nvtxEndRange ! INIT + + call nvtxStartRange("SIMULATION-TIME-MARCH") ! Time-stepping Loop ======================================================= do @@ -95,8 +108,12 @@ program p_main end do ! ========================================================================== + call nvtxEndRange ! Simulation + deallocate (proc_time, io_proc_time) + call nvtxStartRange("FINALIZE-MODULES") call s_finalize_modules() + call nvtxEndRange end program p_main