Skip to content

Commit c94179c

Browse files
mokeyishmichaelmior
authored andcommitted
Add support for EMOJI and CJK unicode
1 parent d9a52eb commit c94179c

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

jsonpath_ng/lexer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ def tokenize(self, string):
6464
t_ignore = ' \t'
6565

6666
def t_ID(self, t):
67-
r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
67+
# CJK: [\u4E00-\u9FA5]
68+
# EMOJI: [\U0001F600-\U0001F64F]
69+
r'([a-zA-Z_@]|[\u4E00-\u9FA5]|[\U0001F600-\U0001F64F])([a-zA-Z0-9_@\-]|[\u4E00-\u9FA5]|[\U0001F600-\U0001F64F])*'
6870
t.type = self.reserved_words.get(t.value, 'ID')
6971
return t
7072

tests/test_parser.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
# Atomic
1111
# ------
1212
#
13+
("😀", Fields("😀")),
14+
("你好", Fields("你好")),
1315
("foo", Fields("foo")),
1416
("*", Fields("*")),
1517
("1", Fields("1")),

0 commit comments

Comments
 (0)