Skip to content

Commit 740b14d

Browse files
committed
Improved efficiency of writing (serial) restart files:
1. allow only master_task to define a serial netcdf restart file 2. remove unnecessary restart_format conditionals 3. fuse master_task conditionals 4. set MPI barriers for global gathers and scatters, wrapped with new cpp flag The gather/scatter barriers do not affect timings in my tests, but Neil Barton reports significant performance improvements in his configuration. Thanks to Alan Wallcraft for this fix. M mpi/ice_gather_scatter.F90 - add barriers to gathers and scatters M io_netcdf/ice_restart.F90 M io_binary/ice_restart.F90 - remove unnecessary restart_format conditionals - fuse master_task conditionals M source/ice_restart_driver.F90 - turn off min/max diagnostics for writes M bld/Macros.Linux.LANL.conejo M doc/cicedoc.pdf M comp_ice - add BARRIERS, gather_scatter_barrier cpp
1 parent 41f9197 commit 740b14d

File tree

7 files changed

+69
-147
lines changed

7 files changed

+69
-147
lines changed

bld/Macros.Linux.LANL.conejo

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ endif
1717
FIXEDFLAGS := -132
1818
FREEFLAGS :=
1919
FFLAGS := -r8 -i4 -O2 -align all -w -ftz -convert big_endian -assume byterecl -fp-model precise
20-
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -g
20+
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -fp-model precise -g
2121
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -fpe0 -CB -traceback
2222
#FFLAGS := -r8 -i4 -align all -w -ftz -convert big_endian -assume byterecl -fpe0 -CB -g
2323

@@ -41,6 +41,9 @@ LDFLAGS := $(FFLAGS) -v
4141
ifeq ($(DITTO), yes)
4242
CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE
4343
endif
44+
ifeq ($(BARRIERS), yes)
45+
CPPDEFS := $(CPPDEFS) -Dgather_scatter_barrier
46+
endif
4447

4548
ifeq ($(IO_TYPE), netcdf)
4649
CPPDEFS := $(CPPDEFS) -Dncdf

comp_ice

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ setenv SHRDIR csm_share # location of CCSM shared code
6666
setenv IO_TYPE netcdf # set to none if netcdf library is unavailable
6767
# set to pio for parallel netcdf
6868
setenv DITTO no # reproducible diagnostics
69+
setenv BARRIERS no # prevent MPI buffer overflow during gather/scatter
6970
setenv THRD no # set to yes for OpenMP threading
7071

7172
if ( $THRD == 'yes') setenv OMP_NUM_THREADS 2 # positive integer

doc/cicedoc.pdf

428 Bytes
Binary file not shown.

io_binary/ice_restart.F90

Lines changed: 24 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,7 @@ subroutine init_restart_read(ice_ic)
103103
string1(1:lenstr(string1)), &
104104
restart_file(1:lenstr(restart_file)),'.eap', &
105105
string2(1:lenstr(string2))
106-
endif
107-
108-
if (restart_format == 'bin') &
109106
call ice_open(nu_restart_eap,filename,0)
110-
111-
if (my_task == master_task) then
112107
read (nu_restart_eap) iignore,rignore,rignore
113108
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
114109
endif
@@ -124,12 +119,7 @@ subroutine init_restart_read(ice_ic)
124119
string1(1:lenstr(string1)), &
125120
restart_file(1:lenstr(restart_file)),'.iage', &
126121
string2(1:lenstr(string2))
127-
endif
128-
129-
if (restart_format == 'bin') &
130122
call ice_open(nu_restart_age,filename,0)
131-
132-
if (my_task == master_task) then
133123
read (nu_restart_age) iignore,rignore,rignore
134124
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
135125
endif
@@ -145,12 +135,7 @@ subroutine init_restart_read(ice_ic)
145135
string1(1:lenstr(string1)), &
146136
restart_file(1:lenstr(restart_file)),'.FY', &
147137
string2(1:lenstr(string2))
148-
endif
149-
150-
if (restart_format == 'bin') &
151138
call ice_open(nu_restart_FY,filename,0)
152-
153-
if (my_task == master_task) then
154139
read (nu_restart_FY) iignore,rignore,rignore
155140
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
156141
endif
@@ -166,12 +151,7 @@ subroutine init_restart_read(ice_ic)
166151
string1(1:lenstr(string1)), &
167152
restart_file(1:lenstr(restart_file)),'.lvl', &
168153
string2(1:lenstr(string2))
169-
endif
170-
171-
if (restart_format == 'bin') &
172154
call ice_open(nu_restart_lvl,filename,0)
173-
174-
if (my_task == master_task) then
175155
read (nu_restart_lvl) iignore,rignore,rignore
176156
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
177157
endif
@@ -187,12 +167,7 @@ subroutine init_restart_read(ice_ic)
187167
string1(1:lenstr(string1)), &
188168
restart_file(1:lenstr(restart_file)),'.pond_cesm', &
189169
string2(1:lenstr(string2))
190-
endif
191-
192-
if (restart_format == 'bin') &
193170
call ice_open(nu_restart_pond,filename,0)
194-
195-
if (my_task == master_task) then
196171
read (nu_restart_pond) iignore,rignore,rignore
197172
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
198173
endif
@@ -208,12 +183,7 @@ subroutine init_restart_read(ice_ic)
208183
string1(1:lenstr(string1)), &
209184
restart_file(1:lenstr(restart_file)),'.pond_lvl', &
210185
string2(1:lenstr(string2))
211-
endif
212-
213-
if (restart_format == 'bin') &
214186
call ice_open(nu_restart_pond,filename,0)
215-
216-
if (my_task == master_task) then
217187
read (nu_restart_pond) iignore,rignore,rignore
218188
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
219189
endif
@@ -229,12 +199,7 @@ subroutine init_restart_read(ice_ic)
229199
string1(1:lenstr(string1)), &
230200
restart_file(1:lenstr(restart_file)),'.pond_topo', &
231201
string2(1:lenstr(string2))
232-
endif
233-
234-
if (restart_format == 'bin') &
235202
call ice_open(nu_restart_pond,filename,0)
236-
237-
if (my_task == master_task) then
238203
read (nu_restart_pond) iignore,rignore,rignore
239204
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
240205
endif
@@ -250,12 +215,7 @@ subroutine init_restart_read(ice_ic)
250215
string1(1:lenstr(string1)), &
251216
restart_file(1:lenstr(restart_file)),'.brine', &
252217
string2(1:lenstr(string2))
253-
endif
254-
255-
if (restart_format == 'bin') &
256218
call ice_open(nu_restart_hbrine,filename,0)
257-
258-
if (my_task == master_task) then
259219
read (nu_restart_hbrine) iignore,rignore,rignore
260220
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
261221
endif
@@ -271,12 +231,7 @@ subroutine init_restart_read(ice_ic)
271231
string1(1:lenstr(string1)), &
272232
restart_file(1:lenstr(restart_file)),'.bgc', &
273233
string2(1:lenstr(string2))
274-
endif
275-
276-
if (restart_format == 'bin') &
277234
call ice_open(nu_restart_bgc,filename,0)
278-
279-
if (my_task == master_task) then
280235
read (nu_restart_bgc) iignore,rignore,rignore
281236
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
282237
endif
@@ -292,12 +247,7 @@ subroutine init_restart_read(ice_ic)
292247
string1(1:lenstr(string1)), &
293248
restart_file(1:lenstr(restart_file)),'.aero', &
294249
string2(1:lenstr(string2))
295-
endif
296-
297-
if (restart_format == 'bin') &
298250
call ice_open(nu_restart_aero,filename,0)
299-
300-
if (my_task == master_task) then
301251
read (nu_restart_aero) iignore,rignore,rignore
302252
write(nu_diag,*) 'Reading ',filename(1:lenstr(filename))
303253
endif
@@ -351,33 +301,21 @@ subroutine init_restart_write(filename_spec)
351301
open(nu_rst_pointer,file=pointer_file)
352302
write(nu_rst_pointer,'(a)') filename
353303
close(nu_rst_pointer)
354-
endif
355-
356-
if (restart_format == 'bin') then
357-
358304
call ice_open(nu_dump,filename,0)
359-
360-
if (my_task == master_task) then
361-
write(nu_dump) istep1,time,time_forc
362-
endif
363-
364-
endif
365-
366-
if (my_task == master_task) then
305+
write(nu_dump) istep1,time,time_forc
367306
write(nu_diag,*) 'Writing ',filename(1:lenstr(filename))
368307
endif
369308

309+
! begin writing restart data
310+
370311
if (kdyn == 2) then
371312

372313
write(filename,'(a,a,a,i4.4,a,i2.2,a,i2.2,a,i5.5)') &
373314
restart_dir(1:lenstr(restart_dir)), &
374315
restart_file(1:lenstr(restart_file)),'.eap.', &
375316
iyear,'-',month,'-',mday,'-',sec
376317

377-
! begin writing restart data
378-
379-
if (restart_format == 'bin') &
380-
call ice_open(nu_dump_eap,filename,0)
318+
call ice_open(nu_dump_eap,filename,0)
381319

382320
if (my_task == master_task) then
383321
write(nu_dump_eap) istep1,time,time_forc
@@ -393,10 +331,7 @@ subroutine init_restart_write(filename_spec)
393331
restart_file(1:lenstr(restart_file)),'.FY.', &
394332
iyear,'-',month,'-',mday,'-',sec
395333

396-
! begin writing restart data
397-
398-
if (restart_format == 'bin') &
399-
call ice_open(nu_dump_FY,filename,0)
334+
call ice_open(nu_dump_FY,filename,0)
400335

401336
if (my_task == master_task) then
402337
write(nu_dump_FY) istep1,time,time_forc
@@ -412,10 +347,7 @@ subroutine init_restart_write(filename_spec)
412347
restart_file(1:lenstr(restart_file)),'.iage.', &
413348
iyear,'-',month,'-',mday,'-',sec
414349

415-
! begin writing restart data
416-
417-
if (restart_format == 'bin') &
418-
call ice_open(nu_dump_age,filename,0)
350+
call ice_open(nu_dump_age,filename,0)
419351

420352
if (my_task == master_task) then
421353
write(nu_dump_age) istep1,time,time_forc
@@ -431,10 +363,7 @@ subroutine init_restart_write(filename_spec)
431363
restart_file(1:lenstr(restart_file)),'.lvl.', &
432364
iyear,'-',month,'-',mday,'-',sec
433365

434-
! begin writing restart data
435-
436-
if (restart_format == 'bin') &
437-
call ice_open(nu_dump_lvl,filename,0)
366+
call ice_open(nu_dump_lvl,filename,0)
438367

439368
if (my_task == master_task) then
440369
write(nu_dump_lvl) istep1,time,time_forc
@@ -450,10 +379,7 @@ subroutine init_restart_write(filename_spec)
450379
restart_file(1:lenstr(restart_file)),'.pond_cesm.', &
451380
iyear,'-',month,'-',mday,'-',sec
452381

453-
! begin writing restart data
454-
455-
if (restart_format == 'bin') &
456-
call ice_open(nu_dump_pond,filename,0)
382+
call ice_open(nu_dump_pond,filename,0)
457383

458384
if (my_task == master_task) then
459385
write(nu_dump_pond) istep1,time,time_forc
@@ -469,10 +395,7 @@ subroutine init_restart_write(filename_spec)
469395
restart_file(1:lenstr(restart_file)),'.pond_lvl.', &
470396
iyear,'-',month,'-',mday,'-',sec
471397

472-
! begin writing restart data
473-
474-
if (restart_format == 'bin') &
475-
call ice_open(nu_dump_pond,filename,0)
398+
call ice_open(nu_dump_pond,filename,0)
476399

477400
if (my_task == master_task) then
478401
write(nu_dump_pond) istep1,time,time_forc
@@ -488,10 +411,7 @@ subroutine init_restart_write(filename_spec)
488411
restart_file(1:lenstr(restart_file)),'.pond_topo.', &
489412
iyear,'-',month,'-',mday,'-',sec
490413

491-
! begin writing restart data
492-
493-
if (restart_format == 'bin') &
494-
call ice_open(nu_dump_pond,filename,0)
414+
call ice_open(nu_dump_pond,filename,0)
495415

496416
if (my_task == master_task) then
497417
write(nu_dump_pond) istep1,time,time_forc
@@ -507,10 +427,7 @@ subroutine init_restart_write(filename_spec)
507427
restart_file(1:lenstr(restart_file)),'.brine.', &
508428
iyear,'-',month,'-',mday,'-',sec
509429

510-
! begin writing restart data
511-
512-
if (restart_format == 'bin') &
513-
call ice_open(nu_dump_hbrine,filename,0)
430+
call ice_open(nu_dump_hbrine,filename,0)
514431

515432
if (my_task == master_task) then
516433
write(nu_dump_hbrine) istep1,time,time_forc
@@ -526,10 +443,7 @@ subroutine init_restart_write(filename_spec)
526443
restart_file(1:lenstr(restart_file)),'.bgc.', &
527444
iyear,'-',month,'-',mday,'-',sec
528445

529-
! begin writing restart data
530-
531-
if (restart_format == 'bin') &
532-
call ice_open(nu_dump_bgc,filename,0)
446+
call ice_open(nu_dump_bgc,filename,0)
533447

534448
if (my_task == master_task) then
535449
write(nu_dump_bgc) istep1,time,time_forc
@@ -545,10 +459,7 @@ subroutine init_restart_write(filename_spec)
545459
restart_file(1:lenstr(restart_file)),'.aero.', &
546460
iyear,'-',month,'-',mday,'-',sec
547461

548-
! begin writing restart data
549-
550-
if (restart_format == 'bin') &
551-
call ice_open(nu_dump_aero,filename,0)
462+
call ice_open(nu_dump_aero,filename,0)
552463

553464
if (my_task == master_task) then
554465
write(nu_dump_aero) istep1,time,time_forc
@@ -603,7 +514,6 @@ subroutine read_restart_field(nu,nrec,work,atype,vname,ndim3, &
603514
real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks) :: &
604515
work2 ! input array (real, 8-byte)
605516

606-
if (restart_format == 'bin') then
607517
write(nu_diag,*) 'vname ',trim(vname)
608518
if (present(field_loc)) then
609519
do n=1,ndim3
@@ -624,9 +534,6 @@ subroutine read_restart_field(nu,nrec,work,atype,vname,ndim3, &
624534
work(:,:,n,:) = work2(:,:,:)
625535
enddo
626536
endif
627-
else
628-
call abort_ice('Invalid restart_format: '//restart_format)
629-
endif
630537

631538
end subroutine read_restart_field
632539

@@ -669,7 +576,6 @@ subroutine write_restart_field(nu,nrec,work,atype,vname,ndim3,diag)
669576
real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks) :: &
670577
work2 ! input array (real, 8-byte)
671578

672-
if (restart_format == 'bin') then
673579
do n=1,ndim3
674580
work2(:,:,:) = work(:,:,n,:)
675581
if (restart_ext) then
@@ -678,9 +584,6 @@ subroutine write_restart_field(nu,nrec,work,atype,vname,ndim3,diag)
678584
call ice_write(nu,nrec,work2,atype,diag)
679585
endif
680586
enddo
681-
else
682-
call abort_ice('Invalid restart_format: '//restart_format)
683-
endif
684587

685588
end subroutine write_restart_field
686589

@@ -696,19 +599,19 @@ subroutine final_restart()
696599

697600
integer (kind=int_kind) :: status
698601

699-
if (restart_format == 'bin') then
700-
if (my_task == master_task) close(nu_dump)
701-
if (my_task == master_task .and. tr_aero) close(nu_dump_aero)
702-
if (my_task == master_task .and. tr_iage) close(nu_dump_age)
703-
if (my_task == master_task .and. tr_FY) close(nu_dump_FY)
704-
if (my_task == master_task .and. tr_lvl) close(nu_dump_lvl)
705-
if (my_task == master_task .and. tr_pond_cesm) close(nu_dump_pond)
706-
if (my_task == master_task .and. tr_pond_lvl) close(nu_dump_pond)
707-
if (my_task == master_task .and. tr_pond_topo) close(nu_dump_pond)
708-
endif
602+
if (my_task == master_task) then
603+
close(nu_dump)
604+
605+
if (tr_aero) close(nu_dump_aero)
606+
if (tr_iage) close(nu_dump_age)
607+
if (tr_FY) close(nu_dump_FY)
608+
if (tr_lvl) close(nu_dump_lvl)
609+
if (tr_pond_cesm) close(nu_dump_pond)
610+
if (tr_pond_lvl) close(nu_dump_pond)
611+
if (tr_pond_topo) close(nu_dump_pond)
709612

710-
if (my_task == master_task) &
711613
write(nu_diag,*) 'Restart read/written ',istep1,time,time_forc
614+
endif
712615

713616
end subroutine final_restart
714617

0 commit comments

Comments
 (0)