Skip to content

Commit fa245f2

Browse files
committed
speed up the parsing of JSON files when using unformatted stream by reading data in chunks rather than one character at a time. See #363
I don't think pop_char doesn't need to be recursive.
1 parent a572c0a commit fa245f2

File tree

3 files changed

+52
-14
lines changed

3 files changed

+52
-14
lines changed

src/json_parameters.F90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ module json_parameters
124124

125125
integer(IK),parameter :: seq_chunk_size = 256_IK !! chunk size for reading sequential files
126126

127+
integer(IK),parameter :: stream_chunk_size = 1000_IK!! chunk size for reading stream files
128+
127129
integer(IK),parameter :: pushed_char_size = 10_IK !! size for `pushed_char`
128130
!! array in [[json_core(type)]]
129131

src/json_string_utilities.F90

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,17 @@ subroutine string_to_integer(str,ival,status_ok)
125125

126126
! Compute how many digits we need to read
127127
ndigits = 2*len_trim(str)
128-
ndigits_digits = floor(log10(real(ndigits)))+1
129-
allocate(character(kind=CDK,len=ndigits_digits) :: digits)
130-
write(digits,'(I0)') ndigits !gfortran will have a runtime error with * edit descriptor here
131-
! gfortran bug: '*' edit descriptor for ISO_10646 strings does bad stuff.
132-
read(str,'(I'//trim(digits)//')',iostat=ierr) ival !string to integer
133-
134-
! error check:
135-
status_ok = (ierr==0)
128+
if (ndigits/=0) then
129+
ndigits_digits = floor(log10(real(ndigits)))+1
130+
allocate(character(kind=CDK,len=ndigits_digits) :: digits)
131+
write(digits,'(I0)') ndigits !gfortran will have a runtime error with * edit descriptor here
132+
! gfortran bug: '*' edit descriptor for ISO_10646 strings does bad stuff.
133+
read(str,'(I'//trim(digits)//')',iostat=ierr) ival !string to integer
134+
! error check:
135+
status_ok = (ierr==0)
136+
else
137+
status_ok = .false.
138+
end if
136139
if (.not. status_ok) ival = 0_IK
137140

138141
end subroutine string_to_integer

src/json_value_module.F90

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,11 @@ module json_value_module
256256
!! (both escaped and unescaped versions are still
257257
!! valid in all cases).
258258

259+
integer :: ichunk = 0 !! index in `chunk` for [[pop_char]]
260+
!! when `use_unformatted_stream=True`
261+
character(kind=CK,len=stream_chunk_size) :: chunk = CK_'' !! a chunk read from a stream file
262+
!! when `use_unformatted_stream=True`
263+
259264
contains
260265

261266
private
@@ -917,6 +922,8 @@ subroutine json_initialize(me,verbose,compact_reals,&
917922
me%char_count = 0
918923
me%line_count = 1
919924
me%ipos = 1
925+
me%ichunk = 0
926+
me%chunk = ''
920927

921928
#ifdef USE_UCS4
922929
! reopen stdout and stderr with utf-8 encoding
@@ -10147,7 +10154,7 @@ end subroutine parse_number
1014710154
!@note This routine ignores non-printing ASCII characters
1014810155
! (`iachar<=31`) that are in strings.
1014910156

10150-
recursive subroutine pop_char(json,unit,str,skip_ws,skip_comments,eof,popped)
10157+
subroutine pop_char(json,unit,str,skip_ws,skip_comments,eof,popped)
1015110158

1015210159
implicit none
1015310160

@@ -10170,6 +10177,10 @@ recursive subroutine pop_char(json,unit,str,skip_ws,skip_comments,eof,popped)
1017010177
logical(LK) :: parsing_comment !! if we are in the process
1017110178
!! of parsing a comment line
1017210179

10180+
logical,parameter :: chunk_it = .true. !! if true, stream files are read in chunks,
10181+
!! rather than one character at a time.
10182+
!! this speeds up the parsing dramatically.
10183+
1017310184
if (.not. json%exception_thrown) then
1017410185

1017510186
eof = .false.
@@ -10201,16 +10212,37 @@ recursive subroutine pop_char(json,unit,str,skip_ws,skip_comments,eof,popped)
1020110212

1020210213
!read the next character:
1020310214
if (use_unformatted_stream) then
10204-
read(unit=unit,pos=json%ipos,iostat=ios) c
10215+
10216+
if (chunk_it) then
10217+
! in this case, we read the file in chunks.
10218+
! if we already have the character we need,
10219+
! then get it from the chunk. Otherwise,
10220+
! read another chunk
10221+
10222+
if (json%ichunk<1) then
10223+
json%ichunk = 0
10224+
read(unit=unit,pos=json%ipos,iostat=ios) json%chunk
10225+
else
10226+
ios = 0
10227+
end if
10228+
json%ichunk = json%ichunk + 1
10229+
c = json%chunk(json%ichunk:json%ichunk)
10230+
if (json%ichunk==len(json%chunk)) then
10231+
json%ichunk = 0 ! reset
10232+
else
10233+
! we have to finish getting
10234+
! characters from this chunk:
10235+
if (IS_IOSTAT_END(ios)) ios = 0
10236+
end if
10237+
else
10238+
read(unit=unit,pos=json%ipos,iostat=ios) c
10239+
end if
10240+
1020510241
else
1020610242
read(unit=unit,fmt='(A1)',advance='NO',iostat=ios) c
1020710243
end if
1020810244
json%ipos = json%ipos + 1
1020910245

10210-
!....note: maybe try read the file in chunks...
10211-
!.... or use asynchronous read with double buffering
10212-
! (see Modern Fortran: Style and Usage)
10213-
1021410246
else !read from the string
1021510247

1021610248
str_len = len(str) !length of the string
@@ -10302,6 +10334,7 @@ subroutine push_char(json,c)
1030210334
!in this case, c is ignored, and we just
1030310335
!decrement the stream position counter:
1030410336
json%ipos = json%ipos - 1
10337+
json%ichunk = json%ichunk - 1
1030510338

1030610339
else
1030710340

0 commit comments

Comments
 (0)