diff --git a/.codee-format b/.codee-format new file mode 100644 index 0000000..fd53b41 --- /dev/null +++ b/.codee-format @@ -0,0 +1,85 @@ +--- +AlignAmpersandToColumnLimit: false +AlignAssignmentOperators: true +AlignUseItems: + Kind: OneItemPerLine + FirstLineFit: FitIfPossible +BreakBeforeBinaryOperators: true +Casing: + Identifiers: Preserve + Keywords: Lowercase + LogicalConstants: Lowercase + LogicalOperators: Lowercase + RelationalOperators: Lowercase + UserDefinedOperators: Lowercase +ColumnLimit: 132 +CommentDirectivePrefixes: [] +DisabledDirectivePrefixes: [] +IndentSize: 2 +IndentExceptions: + ModuleContains: IndentBeforeAndAfter + Comments: Indent +FixedFormLabelAlignment: Right +ContinuationIndentSize: DoubleIndentSize +DoubleColonSeparator: AddAlways +EndOfLineNormalization: Autodetect +EndStatementFormat: EndStructureAndName +EndStatementSeparation: + EndAssociate: Separated + EndBlockConstruct: Separated + EndBlockData: Separated + EndCritical: Separated + EndTeam: Separated + EndDoLoop: Separated + EndEnum: Separated + EndEnumerationType: Separated + EndForall: Separated + EndFunction: Separated + EndIf: Separated + EndInterface: Separated + EndModule: Separated + EndModuleProcedure: Separated + EndProgram: Separated + EndSelect: Separated + EndSubmodule: Separated + EndSubroutine: Separated + EndType: Separated + EndWhere: Separated +EnsureNewlineAtEOF: true +ConsecutiveEmptyLines: + MaxToKeep: 1 + BetweenProcedures: 1 + RemoveAtStartOfFile: true + RemoveAtEndOfFile: true +KindKeywordPrefix: RemoveAlways +MacroIdentifiers: [] +RelationalOperators: UseSymbols +SpacesAroundOperators: + LeftParenthesisExpression: NoTrailing + LeftParenthesisGeneric: NoSpaces + LeftParenthesisKeyword: OnlyLeading + RightParenthesisExpression: NoLeading + RightParenthesisGeneric: NoLeading + RightParenthesisKeyword: NoLeading + Assignment: Both + Association: Both + ControlFlowAssignment: Both + KeywordAssignment: NoSpaces + ParameterAssignment: NoSpaces + BinaryArithmetic: Both + Exponentiation: NoSpaces + DefinedBinary: Both + DefinedUnary: NoTrailing + Relational: Both + RelationalLegacy: Both + LogicalBinary: Both + LogicalNot: NoTrailing + UnaryPlusMinus: NoTrailing + Comma: OnlyTrailing + Concat: Both + DoubleColon: Both +RemoveConsecutiveWhitespace: true +RemoveSemicolons: true +RemoveTrailingWhitespace: true +SeparateMultipleInlineStatements: true +... diff --git a/.github/workflows/CI-Fortran-format.yml b/.github/workflows/CI-Fortran-format.yml new file mode 100644 index 0000000..fed2f22 --- /dev/null +++ b/.github/workflows/CI-Fortran-format.yml @@ -0,0 +1,23 @@ +name: CI-Fortran-format + +on: + pull_request: + branches: [ "master" ] + +jobs: + format: + runs-on: ubuntu-latest + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v4 + - uses: foxtran/setup-codee@v1 + + - name: Run Fortran formatter + run: | + codee format . --verbose + + - name: Check for uncommitted changes + run: | + git diff --exit-code diff --git a/examples/Fortran/example.f90 b/examples/Fortran/example.f90 index e2a759a..2db13b8 100644 --- a/examples/Fortran/example.f90 +++ b/examples/Fortran/example.f90 @@ -19,16 +19,16 @@ program main do if (.not.lb%get_range(bot, top)) exit do j = bot, top - block - real(8), allocatable :: a(:,:), b(:,:), c(:,:) - i = 3 * (100 - j) + 200 - allocate(a(i,i),b(i,i),c(i,i)) - call random_number(a) - call random_number(b) - c = matmul(a,b) - write(7,'(I6,ES15.8)') j, sum(c) - deallocate(a,b,c) - end block + block + real(8), allocatable :: a(:, :), b(:, :), c(:, :) + i = 3 * (100 - j) + 200 + allocate(a(i, i), b(i, i), c(i, i)) + call random_number(a) + call random_number(b) + c = matmul(a, b) + write(7, '(I6,ES15.8)') j, sum(c) + deallocate(a, b, c) + end block end do end do print '(A)', "Done!" diff --git a/src/Fortran/abstract_load_balancer.f90 b/src/Fortran/abstract_load_balancer.f90 index b596ac5..f10fd84 100644 --- a/src/Fortran/abstract_load_balancer.f90 +++ b/src/Fortran/abstract_load_balancer.f90 @@ -15,20 +15,20 @@ module SLB4MPI_abstract_load_balancer_m type, abstract :: load_balancer_t integer(MPI_INTEGER_KIND) :: communicator !< MPI communicator - integer(MPI_INTEGER_KIND) :: rank !< Rank of the process - integer(MPI_INTEGER_KIND) :: root = 0 !< Rank of the root process - integer(MPI_INTEGER_KIND) :: nranks !< Number of processes (group size) - integer(8) :: lower_bound !< Lower bound of range - integer(8) :: upper_bound !< Upper bound of range - integer(8) :: min_chunk_size !< Minimal chunk size for job - integer(8) :: max_chunk_size !< Maximal chunk size for job - contains + integer(MPI_INTEGER_KIND) :: rank !< Rank of the process + integer(MPI_INTEGER_KIND) :: root = 0 !< Rank of the root process + integer(MPI_INTEGER_KIND) :: nranks !< Number of processes (group size) + integer(8) :: lower_bound !< Lower bound of range + integer(8) :: upper_bound !< Upper bound of range + integer(8) :: min_chunk_size !< Minimal chunk size for job + integer(8) :: max_chunk_size !< Maximal chunk size for job + contains procedure(initialize), deferred, public :: initialize - procedure(get_range), deferred, public :: get_range - procedure(clean), deferred, public :: clean + procedure(get_range), deferred, public :: get_range + procedure(clean), deferred, public :: clean procedure :: default_initialize - end type + end type load_balancer_t abstract interface @@ -44,10 +44,10 @@ module SLB4MPI_abstract_load_balancer_m !> subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) import load_balancer_t, MPI_INTEGER_KIND - class(load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + class(load_balancer_t), intent(inout) :: lb + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size end subroutine initialize !> @@ -64,7 +64,7 @@ end subroutine initialize logical function get_range(lb, lower_bound, upper_bound) import load_balancer_t class(load_balancer_t), intent(inout) :: lb - integer(8), intent(out) :: lower_bound, upper_bound + integer(8), intent(out) :: lower_bound, upper_bound end function get_range !> @@ -94,10 +94,10 @@ end subroutine clean !> @param[in] max_chunk_size - maximal size of chank that can be associated with job, default: upper_bound - lower_bound + 1 !> subroutine default_initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) - class(load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + class(load_balancer_t), intent(inout) :: lb + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size integer(MPI_INTEGER_KIND) :: ierr diff --git a/src/Fortran/dynamic_load_balancer.f90 b/src/Fortran/dynamic_load_balancer.f90 index e240e30..2abc742 100644 --- a/src/Fortran/dynamic_load_balancer.f90 +++ b/src/Fortran/dynamic_load_balancer.f90 @@ -8,15 +8,15 @@ module SLB4MPI_dynamic_load_balancer_m type, extends(load_balancer_t) :: dynamic_load_balancer_t #ifdef SLB4MPI_WITH_MPI - integer(MPI_INTEGER_KIND) :: window !< sync window + integer(MPI_INTEGER_KIND) :: window !< sync window #else integer(MPI_INTEGER_KIND) :: counter !< evaluation index #endif - contains + contains procedure :: initialize procedure :: get_range procedure :: clean - end type + end type dynamic_load_balancer_t interface @@ -38,9 +38,9 @@ module SLB4MPI_dynamic_load_balancer_m subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer class(dynamic_load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size integer(MPI_ADDRESS_KIND) :: size integer(MPI_INTEGER_KIND) :: disp_unit, ierr diff --git a/src/Fortran/guided_load_balancer.f90 b/src/Fortran/guided_load_balancer.f90 index 6733b6e..d04b0de 100644 --- a/src/Fortran/guided_load_balancer.f90 +++ b/src/Fortran/guided_load_balancer.f90 @@ -8,15 +8,15 @@ module SLB4MPI_guided_load_balancer_m type, extends(load_balancer_t) :: guided_load_balancer_t #ifdef SLB4MPI_WITH_MPI - integer(MPI_INTEGER_KIND) :: window !< sync window + integer(MPI_INTEGER_KIND) :: window !< sync window #else integer(MPI_INTEGER_KIND) :: counter !< evaluation index #endif - contains + contains procedure :: initialize procedure :: get_range procedure :: clean - end type + end type guided_load_balancer_t interface @@ -38,9 +38,9 @@ module SLB4MPI_guided_load_balancer_m subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer class(guided_load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size integer(MPI_ADDRESS_KIND) :: size integer(MPI_INTEGER_KIND) :: disp_unit, ierr @@ -89,7 +89,8 @@ logical function get_range(lb, lower_bound, upper_bound) result(to_compute) #ifdef SLB4MPI_WITH_MPI call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, lb%root, 0_MPI_INTEGER_KIND, lb%window, ierr) - call MPI_Get(root_counter, 1_MPI_INTEGER_KIND, MPI_INTEGER8, lb%root, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_INTEGER8, lb%window, ierr) + call MPI_Get(root_counter, 1_MPI_INTEGER_KIND, MPI_INTEGER8, lb%root, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_INTEGER8, & + lb%window, ierr) call MPI_Win_flush(lb%root, lb%window, ierr) associated_tasks = min(max((lb%upper_bound - root_counter) / lb%nranks, lb%min_chunk_size), lb%max_chunk_size) call MPI_Fetch_and_op(associated_tasks, lower_bound, MPI_INTEGER8, lb%root, 0_MPI_ADDRESS_KIND, MPI_SUM, lb%window, ierr) diff --git a/src/Fortran/local_static_load_balancer.f90 b/src/Fortran/local_static_load_balancer.f90 index 83168af..d0c0174 100644 --- a/src/Fortran/local_static_load_balancer.f90 +++ b/src/Fortran/local_static_load_balancer.f90 @@ -6,11 +6,11 @@ module SLB4MPI_local_static_load_balancer_m type, extends(load_balancer_t) :: local_static_load_balancer_t private integer(8) :: counter - contains + contains procedure :: initialize procedure :: get_range procedure :: clean - end type + end type local_static_load_balancer_t interface @@ -31,9 +31,9 @@ module SLB4MPI_local_static_load_balancer_m !> subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) class(local_static_load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size ! integer(8) :: n_tasks, extra_tasks @@ -42,16 +42,16 @@ subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size n_tasks = (upper_bound - lower_bound + 1) / lb%nranks extra_tasks = mod(upper_bound - lower_bound + 1, lb%nranks) if (n_tasks < min_chunk_size) then - n_tasks = min_chunk_size - extra_tasks = 0 + n_tasks = min_chunk_size + extra_tasks = 0 end if if (lb%rank < extra_tasks) then - lb%lower_bound = lb%rank * (n_tasks + 1) + lower_bound - lb%upper_bound = (lb%rank + 1) * (n_tasks + 1) + lower_bound - 1 + lb%lower_bound = lb%rank * (n_tasks + 1) + lower_bound + lb%upper_bound = (lb%rank + 1) * (n_tasks + 1) + lower_bound - 1 else - lb%lower_bound = lb%rank * n_tasks + extra_tasks + lower_bound - lb%upper_bound = (lb%rank + 1) * n_tasks + extra_tasks + lower_bound - 1 + lb%lower_bound = lb%rank * n_tasks + extra_tasks + lower_bound + lb%upper_bound = (lb%rank + 1) * n_tasks + extra_tasks + lower_bound - 1 end if lb%lower_bound = min(lb%lower_bound, upper_bound + 1) @@ -74,7 +74,7 @@ end subroutine initialize !> logical function get_range(lb, lower_bound, upper_bound) result(to_compute) class(local_static_load_balancer_t), intent(inout) :: lb - integer(8), intent(out) :: lower_bound, upper_bound + integer(8), intent(out) :: lower_bound, upper_bound to_compute = .true. diff --git a/src/Fortran/runtime_load_balancer.f90 b/src/Fortran/runtime_load_balancer.f90 index df937dd..fc02d32 100644 --- a/src/Fortran/runtime_load_balancer.f90 +++ b/src/Fortran/runtime_load_balancer.f90 @@ -18,11 +18,11 @@ module SLB4MPI_runtime_load_balancer_m type, extends(load_balancer_t) :: runtime_load_balancer_t class(load_balancer_t), allocatable :: balancer !< actual load balancer - contains + contains procedure :: initialize procedure :: get_range procedure :: clean - end type + end type runtime_load_balancer_t interface @@ -45,42 +45,42 @@ module SLB4MPI_runtime_load_balancer_m subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer class(runtime_load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size if (load_balancer_type == ENV_LOAD_BALANCER) then - block - use, intrinsic :: iso_fortran_env, only: error_unit - character(len=80) :: envval - logical :: ok - call get_environment_variable("SLB4MPI_LOAD_BALANCER", envval) - call SLB4MPI_set_schedule(trim(envval), ok) - if (len_trim(envval) /= 0 .and. (.not.ok .or. trim(envval) == 'env')) then - write(error_unit, '(A)') "SLB4MPI_LOAD_BALANCER environmental variable is not set properly!" - write(error_unit, '(A)') "Actual value is '" // trim(envval) // "'" - write(error_unit, '(A)') "Possible values are: static, local_static, dynamic, guided, work_stealing" - write(error_unit, '(A)') "static load balancer will be used!" - call SLB4MPI_set_schedule("static") - else if (len_trim(envval) == 0) then - call SLB4MPI_set_schedule("static") - end if - end block + block + use, intrinsic :: iso_fortran_env, only: error_unit + character(len=80) :: envval + logical :: ok + call get_environment_variable("SLB4MPI_LOAD_BALANCER", envval) + call SLB4MPI_set_schedule(trim(envval), ok) + if (len_trim(envval) /= 0 .and. (.not.ok .or. trim(envval) == 'env')) then + write(error_unit, '(A)') "SLB4MPI_LOAD_BALANCER environmental variable is not set properly!" + write(error_unit, '(A)') "Actual value is '" // trim(envval) // "'" + write(error_unit, '(A)') "Possible values are: static, local_static, dynamic, guided, work_stealing" + write(error_unit, '(A)') "static load balancer will be used!" + call SLB4MPI_set_schedule("static") + else if (len_trim(envval) == 0) then + call SLB4MPI_set_schedule("static") + end if + end block end if select case (load_balancer_type) - case(STATIC_LOAD_BALANCER) - allocate(static_load_balancer_t :: lb%balancer) - case(LOCAL_STATIC_LOAD_BALANCER) - allocate(local_static_load_balancer_t :: lb%balancer) - case(DYNAMIC_LOAD_BALANCER) - allocate(dynamic_load_balancer_t :: lb%balancer) - case(GUIDED_LOAD_BALANCER) - allocate(guided_load_balancer_t :: lb%balancer) - case(WORK_STEALING_LOAD_BALANCER) - allocate(work_stealing_load_balancer_t :: lb%balancer) - case default - error stop "Unknown load balancer" + case (STATIC_LOAD_BALANCER) + allocate(static_load_balancer_t :: lb%balancer) + case (LOCAL_STATIC_LOAD_BALANCER) + allocate(local_static_load_balancer_t :: lb%balancer) + case (DYNAMIC_LOAD_BALANCER) + allocate(dynamic_load_balancer_t :: lb%balancer) + case (GUIDED_LOAD_BALANCER) + allocate(guided_load_balancer_t :: lb%balancer) + case (WORK_STEALING_LOAD_BALANCER) + allocate(work_stealing_load_balancer_t :: lb%balancer) + case default + error stop "Unknown load balancer" end select call lb%balancer%initialize(communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) @@ -125,21 +125,21 @@ subroutine SLB4MPI_set_schedule(lbtype, ok) logical, optional, intent(out) :: ok logical :: ok_ ok_ = .true. - select case(lbtype) - case ("env") - load_balancer_type = ENV_LOAD_BALANCER - case ("static") - load_balancer_type = STATIC_LOAD_BALANCER - case ("local_static") - load_balancer_type = LOCAL_STATIC_LOAD_BALANCER - case ("dynamic") - load_balancer_type = DYNAMIC_LOAD_BALANCER - case ("guided") - load_balancer_type = GUIDED_LOAD_BALANCER - case ("work_stealing") - load_balancer_type = WORK_STEALING_LOAD_BALANCER - case default - ok_ = .false. + select case (lbtype) + case ("env") + load_balancer_type = ENV_LOAD_BALANCER + case ("static") + load_balancer_type = STATIC_LOAD_BALANCER + case ("local_static") + load_balancer_type = LOCAL_STATIC_LOAD_BALANCER + case ("dynamic") + load_balancer_type = DYNAMIC_LOAD_BALANCER + case ("guided") + load_balancer_type = GUIDED_LOAD_BALANCER + case ("work_stealing") + load_balancer_type = WORK_STEALING_LOAD_BALANCER + case default + ok_ = .false. end select if (present(ok)) ok = ok_ end subroutine SLB4MPI_set_schedule diff --git a/src/Fortran/static_load_balancer.f90 b/src/Fortran/static_load_balancer.f90 index 3c5f761..c65aafb 100644 --- a/src/Fortran/static_load_balancer.f90 +++ b/src/Fortran/static_load_balancer.f90 @@ -6,11 +6,11 @@ module SLB4MPI_static_load_balancer_m type, extends(load_balancer_t) :: static_load_balancer_t private integer(8) :: counter - contains + contains procedure :: initialize procedure :: get_range procedure :: clean - end type + end type static_load_balancer_t interface @@ -31,9 +31,9 @@ module SLB4MPI_static_load_balancer_m !> subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) class(static_load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size call lb%default_initialize(communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) @@ -54,7 +54,7 @@ end subroutine initialize !> logical function get_range(lb, lower_bound, upper_bound) result(to_compute) class(static_load_balancer_t), intent(inout) :: lb - integer(8), intent(out) :: lower_bound, upper_bound + integer(8), intent(out) :: lower_bound, upper_bound to_compute = .true. diff --git a/src/Fortran/work_stealing_load_balancer.f90 b/src/Fortran/work_stealing_load_balancer.f90 index 37f2080..07be152 100644 --- a/src/Fortran/work_stealing_load_balancer.f90 +++ b/src/Fortran/work_stealing_load_balancer.f90 @@ -8,20 +8,20 @@ module SLB4MPI_work_stealing_load_balancer_m type, extends(load_balancer_t) :: work_stealing_load_balancer_t #ifdef SLB4MPI_WITH_MPI - integer(MPI_INTEGER_KIND) :: window_num_active !< number of active threads - integer(MPI_INTEGER_KIND) :: window_bounds !< lower and upper bounds of rank + integer(MPI_INTEGER_KIND) :: window_num_active !< number of active threads + integer(MPI_INTEGER_KIND) :: window_bounds !< lower and upper bounds of rank integer(MPI_INTEGER_KIND) :: window_actual_rank !< actual rank to compute (for fast look up) - integer(MPI_INTEGER_KIND) :: window_done !< status of thread - integer(MPI_INTEGER_KIND) :: actual_rank !< value of actual rank - logical :: done = .false. !< task completed? + integer(MPI_INTEGER_KIND) :: window_done !< status of thread + integer(MPI_INTEGER_KIND) :: actual_rank !< value of actual rank + logical :: done = .false. !< task completed? #else - integer(MPI_INTEGER_KIND) :: counter !< evaluation index + integer(MPI_INTEGER_KIND) :: counter !< evaluation index #endif - contains + contains procedure :: initialize procedure :: get_range procedure :: clean - end type + end type work_stealing_load_balancer_t interface @@ -43,9 +43,9 @@ module SLB4MPI_work_stealing_load_balancer_m subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size, max_chunk_size) use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer class(work_stealing_load_balancer_t), intent(inout) :: lb - integer(MPI_INTEGER_KIND), intent(in) :: communicator - integer(8), intent(in) :: lower_bound, upper_bound - integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size + integer(MPI_INTEGER_KIND), intent(in) :: communicator + integer(8), intent(in) :: lower_bound, upper_bound + integer(8), optional, intent(in) :: min_chunk_size, max_chunk_size integer(MPI_INTEGER_KIND) :: ierr integer(8) :: n_tasks, extra_tasks @@ -82,16 +82,16 @@ subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size n_tasks = (upper_bound - lower_bound + 1) / lb%nranks extra_tasks = mod(upper_bound - lower_bound + 1, lb%nranks) if (n_tasks < min_chunk_size) then - n_tasks = min_chunk_size - extra_tasks = 0 + n_tasks = min_chunk_size + extra_tasks = 0 end if if (lb%rank < extra_tasks) then - lb%lower_bound = lb%rank * (n_tasks + 1) + lower_bound - lb%upper_bound = (lb%rank + 1) * (n_tasks + 1) + lower_bound - 1 + lb%lower_bound = lb%rank * (n_tasks + 1) + lower_bound + lb%upper_bound = (lb%rank + 1) * (n_tasks + 1) + lower_bound - 1 else - lb%lower_bound = lb%rank * n_tasks + extra_tasks + lower_bound - lb%upper_bound = (lb%rank + 1) * n_tasks + extra_tasks + lower_bound - 1 + lb%lower_bound = lb%rank * n_tasks + extra_tasks + lower_bound + lb%upper_bound = (lb%rank + 1) * n_tasks + extra_tasks + lower_bound - 1 end if lb%lower_bound = min(lb%lower_bound, upper_bound + 1) @@ -100,8 +100,10 @@ subroutine initialize(lb, communicator, lower_bound, upper_bound, min_chunk_size #ifdef SLB4MPI_WITH_MPI lb%actual_rank = lb%rank - call MPI_Win_allocate(size_num_active, disp_unit_num_active, MPI_INFO_NULL, lb%communicator, baseaddr_num_active, lb%window_num_active, ierr) - call MPI_Win_create(lb%actual_rank, size_actual_rank, disp_unit_actual_rank, MPI_INFO_NULL, lb%communicator, lb%window_actual_rank, ierr) + call MPI_Win_allocate(size_num_active, disp_unit_num_active, MPI_INFO_NULL, lb%communicator, baseaddr_num_active, & + lb%window_num_active, ierr) + call MPI_Win_create(lb%actual_rank, size_actual_rank, disp_unit_actual_rank, MPI_INFO_NULL, lb%communicator, & + lb%window_actual_rank, ierr) call MPI_Win_allocate(size_bounds, disp_unit_bounds, MPI_INFO_NULL, lb%communicator, baseaddr_bounds, lb%window_bounds, ierr) call MPI_Win_create(lb%done, size_done, disp_unit_done, MPI_INFO_NULL, lb%communicator, lb%window_done, ierr) @@ -157,130 +159,139 @@ logical function get_range(lb, lower_bound, upper_bound) result(to_compute) #else if (lb%actual_rank == lb%rank) then - to_compute = .true. - !> select range for computing - call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, lb%rank, 0_MPI_INTEGER_KIND, lb%window_bounds, ierr) - call MPI_Get(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, lb%rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, MPI_INTEGER8, lb%window_bounds, ierr) - call MPI_Win_flush(lb%rank, lb%window_bounds, ierr) - lb%lower_bound = bounds(1) - lb%upper_bound = bounds(2) - lower_bound = lb%lower_bound - upper_bound = min(lower_bound + lb%max_chunk_size - 1, lb%upper_bound) - bounds = [ upper_bound + 1, lb%upper_bound ] + to_compute = .true. + !> select range for computing + call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, lb%rank, 0_MPI_INTEGER_KIND, lb%window_bounds, ierr) + call MPI_Get(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, lb%rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, MPI_INTEGER8, & + lb%window_bounds, ierr) + call MPI_Win_flush(lb%rank, lb%window_bounds, ierr) + lb%lower_bound = bounds(1) + lb%upper_bound = bounds(2) + lower_bound = lb%lower_bound + upper_bound = min(lower_bound + lb%max_chunk_size - 1, lb%upper_bound) + bounds = [ upper_bound + 1, lb%upper_bound ] #ifdef SLB4MPI_DEBUG_RANGES - print '(A,I0,A,I0,A,I0)', 'Thr ', lb%rank, ' computes range from ', lower_bound, ' to ', upper_bound + print '(A,I0,A,I0,A,I0)', 'Thr ', lb%rank, ' computes range from ', lower_bound, ' to ', upper_bound #endif - !> update lower bound - call MPI_Accumulate(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, lb%rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, MPI_INTEGER8, MPI_REPLACE, lb%window_bounds, ierr) - call MPI_Win_unlock(lb%rank, lb%window_bounds, ierr) - !> if it the last block, give that information in advance - if (upper_bound + 1 > lb%upper_bound .and. .not.lb%done) then - call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, lb%root, 0_MPI_INTEGER_KIND, lb%window_num_active, ierr) - call MPI_Fetch_and_op(-1_MPI_INTEGER_KIND, num_active, MPI_INTEGER4, lb%root, 0_MPI_ADDRESS_KIND, MPI_SUM, lb%window_num_active, ierr) - call MPI_Win_unlock(lb%root, lb%window_num_active, ierr) - lb%actual_rank = mod(lb%actual_rank + 1, lb%nranks) - lb%done = .true. - end if - if (lower_bound > upper_bound) to_compute = .false. + !> update lower bound + call MPI_Accumulate(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, lb%rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, MPI_INTEGER8, & + MPI_REPLACE, lb%window_bounds, ierr) + call MPI_Win_unlock(lb%rank, lb%window_bounds, ierr) + !> if it the last block, give that information in advance + if (upper_bound + 1 > lb%upper_bound .and. .not.lb%done) then + call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, lb%root, 0_MPI_INTEGER_KIND, lb%window_num_active, ierr) + call MPI_Fetch_and_op(-1_MPI_INTEGER_KIND, num_active, MPI_INTEGER4, lb%root, 0_MPI_ADDRESS_KIND, MPI_SUM, & + lb%window_num_active, ierr) + call MPI_Win_unlock(lb%root, lb%window_num_active, ierr) + lb%actual_rank = mod(lb%actual_rank + 1, lb%nranks) + lb%done = .true. + end if + if (lower_bound > upper_bound) to_compute = .false. end if !> switch to other ranks hop_count = 0 nohop_count = 0 do while (.not.to_compute) - block - integer(MPI_INTEGER_KIND) :: compute_rank - logical :: done - !> get rank which computes lb%actual_rank + block + integer(MPI_INTEGER_KIND) :: compute_rank + logical :: done + !> get rank which computes lb%actual_rank #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t1) + call system_clock(count=t1) #endif - call MPI_Win_lock(MPI_LOCK_SHARED, lb%actual_rank, MPI_MODE_NOCHECK, lb%window_actual_rank, ierr) + call MPI_Win_lock(MPI_LOCK_SHARED, lb%actual_rank, MPI_MODE_NOCHECK, lb%window_actual_rank, ierr) #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t2) - print '(A,I0,A,I0)', 'lock0, MPI: ', lb%rank, ' delay: ', t2-t1 + call system_clock(count=t2) + print '(A,I0,A,I0)', 'lock0, MPI: ', lb%rank, ' delay: ', t2 - t1 #endif - call MPI_Get(compute_rank, 1_MPI_INTEGER_KIND, MPI_INTEGER4, lb%actual_rank, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_INTEGER4, lb%window_actual_rank, ierr) - call MPI_Win_unlock(lb%actual_rank, lb%window_actual_rank, ierr) - !> check that compute_rank computes itself - if (lb%actual_rank == compute_rank) then - !> check that there is something to steal + call MPI_Get(compute_rank, 1_MPI_INTEGER_KIND, MPI_INTEGER4, lb%actual_rank, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, & + MPI_INTEGER4, lb%window_actual_rank, ierr) + call MPI_Win_unlock(lb%actual_rank, lb%window_actual_rank, ierr) + !> check that compute_rank computes itself + if (lb%actual_rank == compute_rank) then + !> check that there is something to steal #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t1) + call system_clock(count=t1) #endif - call MPI_Win_lock(MPI_LOCK_SHARED, lb%actual_rank, MPI_MODE_NOCHECK, lb%window_done, ierr) + call MPI_Win_lock(MPI_LOCK_SHARED, lb%actual_rank, MPI_MODE_NOCHECK, lb%window_done, ierr) #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t2) - print '(A,I0,A,I0)', 'lock1, MPI: ', lb%rank, ' delay: ', t2-t1 + call system_clock(count=t2) + print '(A,I0,A,I0)', 'lock1, MPI: ', lb%rank, ' delay: ', t2 - t1 #endif - call MPI_Get(done, 1_MPI_INTEGER_KIND, MPI_LOGICAL, lb%actual_rank, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_LOGICAL, lb%window_done, ierr) - call MPI_Win_unlock(lb%actual_rank, lb%window_done, ierr) - if (.not.done) then - !> try to steal min_chunk_size jobs from compute_rank + call MPI_Get(done, 1_MPI_INTEGER_KIND, MPI_LOGICAL, lb%actual_rank, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_LOGICAL, & + lb%window_done, ierr) + call MPI_Win_unlock(lb%actual_rank, lb%window_done, ierr) + if (.not.done) then + !> try to steal min_chunk_size jobs from compute_rank #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t1) + call system_clock(count=t1) #endif - call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, compute_rank, 0_MPI_INTEGER_KIND, lb%window_bounds, ierr) + call MPI_Win_lock(MPI_LOCK_EXCLUSIVE, compute_rank, 0_MPI_INTEGER_KIND, lb%window_bounds, ierr) #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t2) - print '(A,I0,A,I0)', 'lock2, MPI: ', lb%rank, ' delay: ', t2-t1 + call system_clock(count=t2) + print '(A,I0,A,I0)', 'lock2, MPI: ', lb%rank, ' delay: ', t2 - t1 #endif - call MPI_Get(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, compute_rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, MPI_INTEGER8, lb%window_bounds, ierr) - call MPI_Win_flush(compute_rank, lb%window_bounds, ierr) - upper_bound = bounds(2) - lower_bound = max(bounds(1), bounds(2) - lb%min_chunk_size + 1) - bounds = [ bounds(1), lower_bound - 1 ] - if (lower_bound <= upper_bound) to_compute = .true. + call MPI_Get(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, compute_rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, & + MPI_INTEGER8, lb%window_bounds, ierr) + call MPI_Win_flush(compute_rank, lb%window_bounds, ierr) + upper_bound = bounds(2) + lower_bound = max(bounds(1), bounds(2) - lb%min_chunk_size + 1) + bounds = [ bounds(1), lower_bound - 1 ] + if (lower_bound <= upper_bound) to_compute = .true. #ifdef SLB4MPI_DEBUG_RANGES - if (to_compute) then - print '(A,I0,A,I0,A,I0,A,I0)', 'Thr ', lb%rank, ' steals range from ', lower_bound, ' to ', upper_bound, ' from ', compute_rank - print '(A,2I8)', 'new bounds: ', bounds - end if + if (to_compute) then + print '(A,I0,A,I0,A,I0,A,I0)', 'Thr ', lb%rank, ' steals range from ', lower_bound, ' to ', upper_bound, ' from ', & + compute_rank + print '(A,2I8)', 'new bounds: ', bounds + end if #endif - if (to_compute) then - !> update upper bound - call MPI_Accumulate(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, compute_rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, MPI_INTEGER8, MPI_REPLACE, lb%window_bounds, ierr) - end if - call MPI_Win_unlock(compute_rank, lb%window_bounds, ierr) - if (to_compute) return - end if - else - !> switch to lb%actual_rank of compute_rank + if (to_compute) then + !> update upper bound + call MPI_Accumulate(bounds, 2_MPI_INTEGER_KIND, MPI_INTEGER8, compute_rank, 0_MPI_ADDRESS_KIND, 2_MPI_INTEGER_KIND, & + MPI_INTEGER8, MPI_REPLACE, lb%window_bounds, ierr) + end if + call MPI_Win_unlock(compute_rank, lb%window_bounds, ierr) + if (to_compute) return + end if + else + !> switch to lb%actual_rank of compute_rank #ifdef SLB4MPI_DEBUG_RANGES - print '(A,I0)', 'hop MPI: ', lb%rank + print '(A,I0)', 'hop MPI: ', lb%rank #endif - lb%actual_rank = compute_rank - if (lb%actual_rank == lb%rank) exit - !> but... to avoid infinity cycle, sometimes we will check that there is some jobs - hop_count = hop_count + 1 - block - real(4) :: randval - call random_number(randval) - ! log(nranks) <- how often to check cond (each 1, 2, 3, ..) - ! 1 / log(nranks) <- probability - if (randval > 1._4 / log(real(lb%nranks, kind=4)) .and. hop_count < 20) cycle - end block - end if + lb%actual_rank = compute_rank + if (lb%actual_rank == lb%rank) exit + !> but... to avoid infinity cycle, sometimes we will check that there is some jobs + hop_count = hop_count + 1 + block + real(4) :: randval + call random_number(randval) + ! log(nranks) <- how often to check cond (each 1, 2, 3, ..) + ! 1 / log(nranks) <- probability + if (randval > 1._4 / log(real(lb%nranks, kind=4)) .and. hop_count < 20) cycle + end block + end if #ifdef SLB4MPI_DEBUG_RANGES - print '(A,I0)', 'no-hop MPI: ', lb%rank + print '(A,I0)', 'no-hop MPI: ', lb%rank #endif - !> if could not steal job, check how many threads finished job + !> if could not steal job, check how many threads finished job #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t1) + call system_clock(count=t1) #endif - call MPI_Win_lock(MPI_LOCK_SHARED, lb%root, 0_MPI_INTEGER_KIND, lb%window_num_active, ierr) + call MPI_Win_lock(MPI_LOCK_SHARED, lb%root, 0_MPI_INTEGER_KIND, lb%window_num_active, ierr) #ifdef SLB4MPI_DEBUG_LOCKS - call system_clock(count=t2) - print '(A,I0,A,I0)', 'lock3, MPI: ', lb%rank, ' delay: ', t2-t1 + call system_clock(count=t2) + print '(A,I0,A,I0)', 'lock3, MPI: ', lb%rank, ' delay: ', t2 - t1 #endif - call MPI_Get(num_active, 1_MPI_INTEGER_KIND, MPI_INTEGER4, lb%root, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_INTEGER4, lb%window_num_active, ierr) - call MPI_Win_unlock(lb%root, lb%window_num_active, ierr) - if (num_active == 0) return - lb%actual_rank = mod(lb%actual_rank + 1, lb%nranks) - hop_count = 0 - nohop_count = nohop_count + 1 - if (nohop_count > lb%nranks) return - end block + call MPI_Get(num_active, 1_MPI_INTEGER_KIND, MPI_INTEGER4, lb%root, 0_MPI_ADDRESS_KIND, 1_MPI_INTEGER_KIND, MPI_INTEGER4, & + lb%window_num_active, ierr) + call MPI_Win_unlock(lb%root, lb%window_num_active, ierr) + if (num_active == 0) return + lb%actual_rank = mod(lb%actual_rank + 1, lb%nranks) + hop_count = 0 + nohop_count = nohop_count + 1 + if (nohop_count > lb%nranks) return + end block end do #endif