@@ -1540,5 +1540,120 @@ def test_structural_tag_error(stag_format: Dict[str, Any]):
15401540 xgr .Grammar .from_structural_tag (structural_tag )
15411541
15421542
1543+ utf8_stag_format_and_instance_accepted = [
1544+ ({"type" : "const_string" , "value" : "你好" }, "你好" , True ),
1545+ ({"type" : "const_string" , "value" : "你好" }, "hello" , False ),
1546+ ({"type" : "any_text" }, "😊" , True ),
1547+ (
1548+ {
1549+ "type" : "sequence" ,
1550+ "elements" : [
1551+ {"type" : "const_string" , "value" : "开始" },
1552+ {"type" : "json_schema" , "json_schema" : {"type" : "string" }},
1553+ {"type" : "const_string" , "value" : "结束" },
1554+ ],
1555+ },
1556+ '开始"中间"结束' ,
1557+ True ,
1558+ ),
1559+ (
1560+ {
1561+ "type" : "sequence" ,
1562+ "elements" : [
1563+ {"type" : "const_string" , "value" : "开始" },
1564+ {"type" : "json_schema" , "json_schema" : {"type" : "string" }},
1565+ {"type" : "const_string" , "value" : "结束" },
1566+ ],
1567+ },
1568+ "开始中间内容" ,
1569+ False ,
1570+ ),
1571+ (
1572+ {"type" : "tag" , "begin" : "标签开始" , "content" : {"type" : "any_text" }, "end" : "标签结束" },
1573+ "标签开始一些内容标签结束" ,
1574+ True ,
1575+ ),
1576+ (
1577+ {"type" : "tag" , "begin" : "标签开始" , "content" : {"type" : "any_text" }, "end" : "标签结束" },
1578+ "标签开始一些内容" ,
1579+ False ,
1580+ ),
1581+ (
1582+ {
1583+ "type" : "or" ,
1584+ "elements" : [
1585+ {"type" : "const_string" , "value" : "选项一" },
1586+ {"type" : "const_string" , "value" : "选项二" },
1587+ ],
1588+ },
1589+ "选项一" ,
1590+ True ,
1591+ ),
1592+ (
1593+ {
1594+ "type" : "or" ,
1595+ "elements" : [
1596+ {"type" : "const_string" , "value" : "选项一" },
1597+ {"type" : "const_string" , "value" : "选项二" },
1598+ ],
1599+ },
1600+ "选项三" ,
1601+ False ,
1602+ ),
1603+ (
1604+ {
1605+ "type" : "tags_with_separator" ,
1606+ "tags" : [{"begin" : "项开始" , "content" : {"type" : "any_text" }, "end" : "项结束" }],
1607+ "separator" : "分隔符" ,
1608+ },
1609+ "项开始内容1项结束分隔符项开始内容2项结束" ,
1610+ True ,
1611+ ),
1612+ (
1613+ {
1614+ "type" : "tags_with_separator" ,
1615+ "tags" : [{"begin" : "项开始" , "content" : {"type" : "any_text" }, "end" : "项结束" }],
1616+ "separator" : "分隔符" ,
1617+ },
1618+ "项开始内容1项结束项开始内容2项结束" ,
1619+ False ,
1620+ ),
1621+ (
1622+ {
1623+ "type" : "json_schema" ,
1624+ "json_schema" : {
1625+ "type" : "object" ,
1626+ "properties" : {"字段" : {"type" : "string" }},
1627+ "required" : ["字段" ],
1628+ "additionalProperties" : False ,
1629+ },
1630+ },
1631+ '{"字段": "值"}' ,
1632+ True ,
1633+ ),
1634+ (
1635+ {
1636+ "type" : "qwen_xml_parameter" ,
1637+ "json_schema" : {
1638+ "type" : "object" ,
1639+ "properties" : {"参数" : {"type" : "string" }},
1640+ "required" : ["参数" ],
1641+ "additionalProperties" : False ,
1642+ },
1643+ },
1644+ "<parameter=参数>值</parameter>" ,
1645+ True ,
1646+ ),
1647+ ]
1648+
1649+
1650+ @pytest .mark .parametrize (
1651+ "stag_format, instance, is_accepted" , utf8_stag_format_and_instance_accepted
1652+ )
1653+ def test_basic_structural_tag_utf8 (stag_format : Dict [str , Any ], instance : str , is_accepted : bool ):
1654+ """Test structural tag with UTF-8 characters"""
1655+ check_stag_with_instance (stag_format , instance , is_accepted )
1656+
1657+
15431658if __name__ == "__main__" :
15441659 pytest .main (sys .argv )
0 commit comments