11# frozen_string_literal: true
2+
23require_relative 'test_helper'
34
45class JSONEncodingTest < Test ::Unit ::TestCase
@@ -37,7 +38,7 @@ def test_unicode
3738 assert_equal '"\u001f"' , 0x1f . chr . to_json
3839 assert_equal '" "' , ' ' . to_json
3940 assert_equal "\" #{ 0x7f . chr } \" " , 0x7f . chr . to_json
40- utf8 = [ "© ≠ €! \01 " ]
41+ utf8 = [ "© ≠ €! \01 " ]
4142 json = '["© ≠ €! \u0001"]'
4243 assert_equal json , utf8 . to_json ( ascii_only : false )
4344 assert_equal utf8 , parse ( json )
@@ -78,10 +79,10 @@ def test_chars
7879 json = '"\u%04x"' % i
7980 i = i . chr
8081 assert_equal i , parse ( json ) [ 0 ]
81- if i == ?\b
82+ if i == " \b "
8283 generated = generate ( i )
83- assert '"\b"' == generated || '"\10"' == generated
84- elsif [ ?\n , ?\r , ?\t , ?\f ] . include? ( i )
84+ assert [ '"\b"' , '"\10"' ] . include? ( generated )
85+ elsif [ " \n " , " \r " , " \t " , " \f " ] . include? ( i )
8586 assert_equal i . dump , generate ( i )
8687 elsif i . chr < 0x20 . chr
8788 assert_equal json , generate ( i )
@@ -92,4 +93,179 @@ def test_chars
9293 end
9394 assert_equal "\302 \200 " , parse ( '"\u0080"' )
9495 end
96+
97+ def test_deeply_nested_structures
98+ # Test for deeply nested arrays
99+ nesting_level = 100
100+ deeply_nested = [ ]
101+ current = deeply_nested
102+
103+ ( nesting_level - 1 ) . times do
104+ current << [ ]
105+ current = current [ 0 ]
106+ end
107+
108+ json = generate ( deeply_nested )
109+ assert_equal deeply_nested , parse ( json )
110+
111+ # Test for deeply nested objects/hashes
112+ deeply_nested_hash = { }
113+ current_hash = deeply_nested_hash
114+
115+ ( nesting_level - 1 ) . times do |i |
116+ current_hash [ "key#{ i } " ] = { }
117+ current_hash = current_hash [ "key#{ i } " ]
118+ end
119+
120+ json = generate ( deeply_nested_hash )
121+ assert_equal deeply_nested_hash , parse ( json )
122+ end
123+
124+ def test_very_large_json_strings
125+ # Create a large array with repeated elements
126+ large_array = Array . new ( 10_000 ) { |i | "item#{ i } " }
127+
128+ json = generate ( large_array )
129+ parsed = parse ( json )
130+
131+ assert_equal large_array . size , parsed . size
132+ assert_equal large_array . first , parsed . first
133+ assert_equal large_array . last , parsed . last
134+
135+ # Create a large hash
136+ large_hash = { }
137+ 10_000 . times { |i | large_hash [ "key#{ i } " ] = "value#{ i } " }
138+
139+ json = generate ( large_hash )
140+ parsed = parse ( json )
141+
142+ assert_equal large_hash . size , parsed . size
143+ assert_equal large_hash [ "key0" ] , parsed [ "key0" ]
144+ assert_equal large_hash [ "key9999" ] , parsed [ "key9999" ]
145+ end
146+
147+ def test_invalid_utf8_sequences
148+ # Create strings with invalid UTF-8 sequences
149+ invalid_utf8 = "\xFF \xFF "
150+
151+ # Test that generating JSON with invalid UTF-8 raises an error
152+ # Different JSON implementations may handle this differently,
153+ # so we'll check if any exception is raised
154+ begin
155+ generate ( invalid_utf8 )
156+ raise "Expected an exception when generating JSON with invalid UTF8"
157+ rescue StandardError => e
158+ assert true
159+ assert_match ( %r{source sequence is illegal/malformed utf-8} , e . message )
160+ end
161+ end
162+
163+ def test_surrogate_pair_handling
164+ # Test valid surrogate pairs
165+ assert_equal "\u{10000} " , parse ( '"\ud800\udc00"' )
166+ assert_equal "\u{10FFFF} " , parse ( '"\udbff\udfff"' )
167+
168+ # The existing test already checks for orphaned high surrogate
169+ assert_raise ( JSON ::ParserError ) { parse ( '"\ud800"' ) }
170+
171+ # Test generating surrogate pairs
172+ utf8_string = "\u{10437} "
173+ generated = generate ( utf8_string , ascii_only : true )
174+ assert_match ( /\\ ud801\\ udc37/ , generated )
175+ end
176+
177+ def test_json_escaping_edge_cases
178+ # Test escaping forward slashes
179+ assert_equal "/" , parse ( '"\/"' )
180+
181+ # Test escaping backslashes
182+ assert_equal "\\ " , parse ( '"\\\\"' )
183+
184+ # Test escaping quotes
185+ assert_equal '"' , parse ( '"\\""' )
186+
187+ # Multiple escapes in sequence - different JSON parsers might handle escaped forward slashes differently
188+ # Some parsers preserve the escaping, others don't
189+ escaped_result = parse ( '"\\\\\\"\\/"' )
190+ assert_match ( /\\ "/ , escaped_result )
191+ assert_match ( %r{/} , escaped_result )
192+
193+ # Generate string with all special characters
194+ special_chars = "\b \f \n \r \t \" \\ "
195+ escaped_json = generate ( special_chars )
196+ assert_equal special_chars , parse ( escaped_json )
197+ end
198+
199+ def test_empty_objects_and_arrays
200+ # Test empty objects with different encodings
201+ assert_equal ( { } , parse ( '{}' ) )
202+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_16BE ) ) )
203+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_16LE ) ) )
204+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_32BE ) ) )
205+ assert_equal ( { } , parse ( '{}' . encode ( Encoding ::UTF_32LE ) ) )
206+
207+ # Test empty arrays with different encodings
208+ assert_equal ( [ ] , parse ( '[]' ) )
209+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_16BE ) ) )
210+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_16LE ) ) )
211+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_32BE ) ) )
212+ assert_equal ( [ ] , parse ( '[]' . encode ( Encoding ::UTF_32LE ) ) )
213+
214+ # Test generating empty objects and arrays
215+ assert_equal '{}' , generate ( { } )
216+ assert_equal '[]' , generate ( [ ] )
217+ end
218+
219+ def test_null_character_handling
220+ # Test parsing null character
221+ assert_equal "\u0000 " , parse ( '"\u0000"' )
222+
223+ # Test generating null character
224+ string_with_null = "\u0000 "
225+ generated = generate ( string_with_null )
226+ assert_equal '"\u0000"' , generated
227+
228+ # Test null characters in middle of string
229+ mixed_string = "before\u0000 after"
230+ generated = generate ( mixed_string )
231+ assert_equal mixed_string , parse ( generated )
232+ end
233+
234+ def test_whitespace_handling
235+ # Test parsing with various whitespace patterns
236+ assert_equal ( { } , parse ( ' { } ' ) )
237+ assert_equal ( { } , parse ( "{\r \n }" ) )
238+ assert_equal ( [ ] , parse ( " [ \n ] " ) )
239+ assert_equal ( [ "a" , "b" ] , parse ( " [ \n \" a\" ,\r \n \" b\" \n ] " ) )
240+ assert_equal ( { "a" => "b" } , parse ( " { \n \" a\" \r \n : \t \" b\" \n } " ) )
241+
242+ # Test with excessive whitespace
243+ excessive_whitespace = " \n \r \t " * 10 + "{}" + " \n \r \t " * 10
244+ assert_equal ( { } , parse ( excessive_whitespace ) )
245+
246+ # Mixed whitespace in keys and values
247+ mixed_json = '{"a \n b":"c \r\n d"}'
248+ assert_equal ( { "a \n b" => "c \r \n d" } , parse ( mixed_json ) )
249+ end
250+
251+ def test_control_character_handling
252+ # Test all control characters (U+0000 to U+001F)
253+ ( 0 ..0x1F ) . each do |i |
254+ # Skip already tested ones
255+ next if [ 0x08 , 0x0A , 0x0D , 0x0C , 0x09 ] . include? ( i )
256+
257+ control_char = i . chr ( 'UTF-8' )
258+ escaped_json = '"' + "\\ u%04x" % i + '"'
259+ assert_equal control_char , parse ( escaped_json )
260+
261+ # Check that the character is properly escaped when generating
262+ assert_match ( /\\ u00[0-1][0-9a-f]/ , generate ( control_char ) )
263+ end
264+
265+ # Test string with multiple control characters
266+ control_str = "\u0001 \u0002 \u0003 \u0004 "
267+ generated = generate ( control_str )
268+ assert_equal control_str , parse ( generated )
269+ assert_match ( /\\ u0001\\ u0002\\ u0003\\ u0004/ , generated )
270+ end
95271end
0 commit comments