@@ -41,15 +41,15 @@ def construct_compiled_grammar():
4141
4242def test_get_serialization_version ():
4343 """Test the version of the serialized JSON string."""
44- assert xgr .get_serialization_version () == "v4 "
44+ assert xgr .get_serialization_version () == "v5 "
4545
4646
4747def test_serialize_grammar ():
4848 """Test Grammar serialization produces expected JSON string."""
4949 grammar = construct_grammar ()
5050 serialized = grammar .serialize_json ()
5151 expected_json = {
52- "rules" : [["rule1" , 4 , 9 ], ["root_rule" , 8 , - 1 ]],
52+ "rules" : [["rule1" , 4 , 9 , True ], ["root_rule" , 8 , - 1 , False ]],
5353 "grammar_expr_data" : [0 , 2 , 7 , 10 , 14 , 18 , 21 , 24 , 28 , 31 ],
5454 "grammar_expr_indptr" : [
5555 # fmt: off
@@ -60,8 +60,7 @@ def test_serialize_grammar():
6060 "complete_fsm" : None ,
6161 "per_rule_fsms" : [],
6262 "allow_empty_rule_ids" : [],
63- "exact_lookahead" : [],
64- "__VERSION__" : "v4" ,
63+ "__VERSION__" : "v5" ,
6564 }
6665 # The fsms are the same one, but the start state and end states are different.
6766 assert json .loads (serialized ) == expected_json
@@ -70,7 +69,7 @@ def test_serialize_grammar():
7069def test_serialize_grammar_exception ():
7170 """Test Grammar serialization produces expected JSON string."""
7271 expected_json = {
73- "rules" : [["rule1" , 4 , 9 ], ["root_rule" , 8 , - 1 ]],
72+ "rules" : [["rule1" , 4 , 9 , True ], ["root_rule" , 8 , - 1 , False ]],
7473 "grammar_expr_data" : [0 , 2 , 7 , 10 , 14 , 18 , 21 , 24 , 28 , 31 ],
7574 "grammar_expr_indptr" : [
7675 # fmt: off
@@ -81,15 +80,14 @@ def test_serialize_grammar_exception():
8180 "allow_empty_rule_ids" : [],
8281 "complete_fsm" : None ,
8382 "per_rule_fsms" : [],
84- "exact_lookahead" : [],
85- "__VERSION__" : "v4" ,
83+ "__VERSION__" : "v5" ,
8684 }
8785
8886 expected_json ["__VERSION__" ] = "v1" # Change version to trigger error
8987 with pytest .raises (xgr .DeserializeVersionError ):
9088 xgr .Grammar .deserialize_json (json .dumps (expected_json ))
9189
92- expected_json ["__VERSION__" ] = "v4 "
90+ expected_json ["__VERSION__" ] = "v5 "
9391 expected_json .pop ("rules" ) # Remove required field to trigger error
9492 with pytest .raises (xgr .DeserializeFormatError ):
9593 xgr .Grammar .deserialize_json (json .dumps (expected_json ))
@@ -141,7 +139,7 @@ def test_serialize_tokenizer_info():
141139 '"decoded_vocab":["1","212","a","A","b","\\ u00e4\\ u00b8\\ u0080","-","aBc","abc"],'
142140 '"sorted_decoded_vocab":[[6,"-"],[3,"A"],[2,"a"],[7,"aBc"],[8,"abc"],[4,"b"],[5,"\\ u00e4\\ u00b8\\ u0080"]],'
143141 '"trie_subtree_nodes_range":[1,2,5,4,5,6,7],'
144- '"__VERSION__":"v4 "}'
142+ '"__VERSION__":"v5 "}'
145143 )
146144 assert json .loads (serialized ) == json .loads (expected_json )
147145
@@ -195,7 +193,7 @@ def test_serialize_compiled_grammar():
195193
196194 expected_json = {
197195 "grammar" : {
198- "rules" : [["rule1" , 4 , 6 ], ["root_rule" , 10 , - 1 ]],
196+ "rules" : [["rule1" , 4 , 6 , True ], ["root_rule" , 10 , - 1 , False ]],
199197 "grammar_expr_data" : [0 , 2 , 7 , 10 , 14 , 18 , 21 , 24 , 27 , 30 , 34 ],
200198 "grammar_expr_indptr" : [
201199 # fmt: off
@@ -215,15 +213,14 @@ def test_serialize_compiled_grammar():
215213 [{'data_' : [[0 , 47 , 3 ], [58 , 127 , 3 ], [192 , 223 , 1 ], [224 , 239 , 4 ], [240 , 247 , 5 ], [128 , 191 , 3 ], [- 2 , 0 , 2 ], [128 , 191 , 1 ], [128 , 191 , 4 ], [- 2 , 0 , 8 ], [97 , 97 , 6 ]],
216214 'indptr_' : [0 , 5 , 6 , 6 , 7 , 8 , 9 , 9 , 10 , 11 ]}, 7 , [6 ], False ]],
217215 # fmt: on
218- "exact_lookahead" : [],
219216 },
220217 "tokenizer_metadata" : {
221218 "vocab_type" : 1 ,
222219 "vocab_size" : 10 ,
223220 "add_prefix_space" : True ,
224221 "stop_token_ids" : [0 , 1 ],
225222 },
226- "__VERSION__" : "v4 " ,
223+ "__VERSION__" : "v5 " ,
227224 }
228225
229226 class AdaptiveTokenMask (BaseModel ):
0 commit comments