Skip to content

Commit d4462f1

Browse files
committed
some cleanup and possible efficiency improvements for hex string validation. Some adjustments to error messages for invalid hex. Fixes #354
An exception is now raised for a string that ends in an escape character. Fixes #353
1 parent 967cfee commit d4462f1

File tree

2 files changed

+66
-81
lines changed

2 files changed

+66
-81
lines changed

src/json_string_utilities.F90

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -473,29 +473,34 @@ subroutine unescape_string(str, error_message)
473473
i = i + 1
474474
c = str(i:i) !character after the escape
475475

476-
if (any(c == [quotation_mark,backslash,slash, &
477-
to_unicode(['b','f','n','r','t'])])) then
478-
479-
select case(c)
480-
case (quotation_mark,backslash,slash)
481-
!use d as is
482-
case (CK_'b')
483-
c = bspace
484-
case (CK_'f')
485-
c = formfeed
486-
case (CK_'n')
487-
c = newline
488-
case (CK_'r')
489-
c = carriage_return
490-
case (CK_'t')
491-
c = horizontal_tab
492-
end select
493-
476+
select case(c)
477+
case (quotation_mark,backslash,slash)
478+
!use d as is
479+
m = m + 1
480+
str_tmp(m:m) = c
481+
case (CK_'b')
482+
c = bspace
483+
m = m + 1
484+
str_tmp(m:m) = c
485+
case (CK_'f')
486+
c = formfeed
487+
m = m + 1
488+
str_tmp(m:m) = c
489+
case (CK_'n')
490+
c = newline
491+
m = m + 1
492+
str_tmp(m:m) = c
493+
case (CK_'r')
494+
c = carriage_return
495+
m = m + 1
496+
str_tmp(m:m) = c
497+
case (CK_'t')
498+
c = horizontal_tab
494499
m = m + 1
495500
str_tmp(m:m) = c
496501

497-
else if (c == 'u') then !expecting 4 hexadecimal digits after
498-
!the escape character [\uXXXX]
502+
case (CK_'u') ! expecting 4 hexadecimal digits after
503+
! the escape character [\uXXXX]
499504

500505
!for now, we are just returning them as is
501506
![not checking to see if it is a valid hex value]
@@ -505,33 +510,47 @@ subroutine unescape_string(str, error_message)
505510
! \uXXXX
506511

507512
if (i+4<=n) then
508-
m = m + 1
509-
str_tmp(m:m+5) = str(i-1:i+4)
510-
i = i + 4
511-
m = m + 5
513+
514+
! validate the hex string:
515+
if (valid_json_hex(str(i+1:i+4))) then
516+
m = m + 1
517+
str_tmp(m:m+5) = str(i-1:i+4)
518+
i = i + 4
519+
m = m + 5
520+
else
521+
error_message = 'Error in unescape_string:'//&
522+
' Invalid hexadecimal sequence in string "'//&
523+
trim(str)//'" ['//str(i-1:i+4)//']'
524+
if (allocated(str_tmp)) deallocate(str_tmp)
525+
return
526+
end if
512527
else
513528
error_message = 'Error in unescape_string:'//&
514-
' Invalid hexadecimal sequence'//&
515-
' in string: '//str(i-1:)
529+
' Invalid hexadecimal sequence in string "'//&
530+
trim(str)//'" ['//str(i-1:)//']'
516531
if (allocated(str_tmp)) deallocate(str_tmp)
517532
return
518533
end if
519534

520-
else
535+
case default
536+
521537
!unknown escape character
522538
error_message = 'Error in unescape_string:'//&
523539
' unknown escape sequence in string "'//&
524540
trim(str)//'" ['//backslash//c//']'
525541
if (allocated(str_tmp)) deallocate(str_tmp)
526542
return
527-
end if
543+
544+
end select
528545

529546
else
530-
!an escape character is the last character in
531-
! the string [this may not be valid syntax,
532-
! but just keep it]
533-
m = m + 1
534-
str_tmp(m:m) = c
547+
! an escape character is the last character in
548+
! the string. This is an error.
549+
error_message = 'Error in unescape_string:'//&
550+
' invalid escape character in string "'//&
551+
trim(str)//'"'
552+
if (allocated(str_tmp)) deallocate(str_tmp)
553+
return
535554
end if
536555

537556
else

src/json_value_module.F90

Lines changed: 14 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8752,11 +8752,11 @@ subroutine json_parse_file(json, file, p, unit)
87528752
! but we'll allocate something here just in case.
87538753
p%name = trim(file) !use the file name
87548754

8755-
! parse as a value
8755+
! parse as a value
87568756
call json%parse_value(unit=iunit, str=CK_'', value=p)
87578757

8758-
! close the file if necessary
8759-
close(unit=iunit, iostat=istat)
8758+
! close the file if necessary
8759+
close(unit=iunit, iostat=istat)
87608760

87618761
! check for errors:
87628762
if (json%exception_thrown) then
@@ -9868,22 +9868,22 @@ end subroutine parse_array
98689868
! * Jacob Williams : 6/16/2014 : Added hex validation.
98699869
! * Jacob Williams : 12/3/2015 : Fixed some bugs.
98709870
! * Jacob Williams : 8/23/2015 : `string` is now returned unescaped.
9871+
! * Jacob Williams : 7/21/2018 : moved hex validate to [[unescape_string]].
98719872

98729873
subroutine parse_string(json, unit, str, string)
98739874

98749875
implicit none
98759876

98769877
class(json_core),intent(inout) :: json
9877-
integer(IK),intent(in) :: unit !! file unit number (if parsing from a file)
9878-
character(kind=CK,len=*),intent(in) :: str !! JSON string (if parsing from a string)
9879-
character(kind=CK,len=:),allocatable,intent(out) :: string !! the string (unescaped if necessary)
9878+
integer(IK),intent(in) :: unit !! file unit number (if
9879+
!! parsing from a file)
9880+
character(kind=CK,len=*),intent(in) :: str !! JSON string (if parsing
9881+
!! from a string)
9882+
character(kind=CK,len=:),allocatable,intent(out) :: string !! the string (unescaped
9883+
!! if necessary)
98809884

98819885
logical(LK) :: eof !! end of file flag
9882-
logical(LK) :: is_hex !! it is a hex string
9883-
logical(LK) :: escape !! for escape string parsing
98849886
character(kind=CK,len=1) :: c !! character returned by [[pop_char]]
9885-
character(kind=CK,len=4) :: hex !! hex string
9886-
integer(IK) :: i !! counter
98879887
integer(IK) :: ip !! index to put next character,
98889888
!! to speed up by reducing the number
98899889
!! of character string reallocations.
@@ -9895,10 +9895,7 @@ subroutine parse_string(json, unit, str, string)
98959895
if (.not. json%exception_thrown) then
98969896

98979897
!initialize:
9898-
ip = 1
9899-
is_hex = .false.
9900-
escape = .false.
9901-
i = 0
9898+
ip = 1
99029899

99039900
do
99049901

@@ -9910,10 +9907,8 @@ subroutine parse_string(json, unit, str, string)
99109907
call json%throw_exception('Error in parse_string: Expecting end of string')
99119908
return
99129909

9913-
else if (c==quotation_mark .and. .not. escape) then !end of string
9910+
else if (c==quotation_mark) then !end of string
99149911

9915-
if (is_hex) call json%throw_exception('Error in parse_string:'//&
9916-
' incomplete hex string: \u'//trim(hex))
99179912
exit
99189913

99199914
else
@@ -9925,36 +9920,6 @@ subroutine parse_string(json, unit, str, string)
99259920
string(ip:ip) = c
99269921
ip = ip + 1
99279922

9928-
!hex validation:
9929-
if (is_hex) then !accumulate the four characters after '\u'
9930-
9931-
i=i+1
9932-
hex(i:i) = c
9933-
if (i==4) then
9934-
if (valid_json_hex(hex)) then
9935-
i = 0
9936-
hex = CK_''
9937-
is_hex = .false.
9938-
else
9939-
call json%throw_exception('Error in parse_string:'//&
9940-
' invalid hex string: \u'//trim(hex))
9941-
exit
9942-
end if
9943-
end if
9944-
9945-
else
9946-
9947-
!when the '\u' string is encountered, then
9948-
! start accumulating the hex string (should be the next 4 characters)
9949-
if (escape) then
9950-
escape = .false.
9951-
is_hex = (c==CK_'u') !the next four characters are the hex string
9952-
else
9953-
escape = (c==backslash)
9954-
end if
9955-
9956-
end if
9957-
99589923
end if
99599924

99609925
end do
@@ -9968,7 +9933,8 @@ subroutine parse_string(json, unit, str, string)
99689933
end if
99699934
end if
99709935

9971-
!string is returned unescaped:
9936+
! string is returned unescaped:
9937+
! (this will also validate any hex strings present)
99729938
call unescape_string(string,error_message)
99739939
if (allocated(error_message)) then
99749940
call json%throw_exception(error_message)

0 commit comments

Comments
 (0)