Skip to content

Commit 0014f00

Browse files
committed
Add the main lexer logic
1 parent b4e2f44 commit 0014f00

File tree

2 files changed

+184
-3
lines changed

2 files changed

+184
-3
lines changed

jsonschema_lexer/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
"""
2-
Fill me in!
3-
"""
1+
from .lexer import JSONSchemaLexer # type: ignore

jsonschema_lexer/lexer.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
from pygments.lexer import include, RegexLexer
2+
from pygments.token import Token
3+
4+
def _get_regex_from_options(options: list[str]) -> str:
5+
"""
6+
Constructs a regular expression pattern allowing any string from the options list.
7+
8+
Args:
9+
options (list[str]): List of options to be included in the regex pattern.
10+
11+
Returns:
12+
str: Regular expression pattern constructed from the options.
13+
"""
14+
options = ['"' + option + '"' for option in options]
15+
regex_str = "(" + "|".join(options) + ")"
16+
return regex_str
17+
18+
class JSONSchemaLexer(RegexLexer):
19+
"""
20+
Lexer for JSON Schema syntax highlighting.
21+
"""
22+
name = "JSON Schema Lexer"
23+
24+
data_types = ["object", "integer", "string", "number", "array", "boolean", "null"]
25+
core_keywords = [
26+
r"\$schema",
27+
r"\$id",
28+
r"\$ref",
29+
r"\$defs",
30+
r"\$comment",
31+
r"\$dynamicAnchor",
32+
r"\$dynamicRef",
33+
r"\$anchor",
34+
r"\$vocabulary",
35+
]
36+
applicator_keywords = [
37+
"oneOf",
38+
"allOf",
39+
"anyOf",
40+
"if",
41+
"then",
42+
"else",
43+
"not",
44+
"properties",
45+
"patternProperties",
46+
"additionalProperties",
47+
"dependentSchemas",
48+
"propertyNames",
49+
"prefixNames",
50+
"contains",
51+
"items",
52+
]
53+
meta_data_keywords = [
54+
"title",
55+
"description",
56+
"default",
57+
"deprecated",
58+
"examples",
59+
"readOnly",
60+
"writeOnly",
61+
]
62+
validation_keywords = [
63+
"type",
64+
"enum",
65+
"const",
66+
"minLength",
67+
"maxLength",
68+
"pattern",
69+
"maximum",
70+
"exclusiveMinimum",
71+
"multipleOf",
72+
"exclusiveMaximum",
73+
"minimum",
74+
"dependentRequired",
75+
"minProperties",
76+
"maxProperties",
77+
"required",
78+
"minItems",
79+
"maxItems",
80+
"minContains",
81+
"maxContains",
82+
"uniqueItems",
83+
]
84+
other_keywords = ["format", "unevaluated", "content", "format_assertion"]
85+
86+
tokens = {
87+
"whitespace": [
88+
(r"\s+", Token.Whitespace),
89+
],
90+
"data_types": [
91+
# Used Literal type here to differentiate the highlighted color of data types from other keywords
92+
(_get_regex_from_options(data_types), Token.Literal),
93+
],
94+
"core_keywords": [
95+
(
96+
_get_regex_from_options(core_keywords),
97+
Token.Keyword.Reserved,
98+
"objectattribute",
99+
),
100+
],
101+
"applicator_keywords": [
102+
(
103+
_get_regex_from_options(applicator_keywords),
104+
Token.Keyword.Reserved,
105+
"objectattribute",
106+
),
107+
],
108+
"validation_keywords": [
109+
(
110+
_get_regex_from_options(validation_keywords),
111+
Token.Keyword.Reserved,
112+
"objectattribute",
113+
),
114+
],
115+
"meta_data_keywords": [
116+
(
117+
_get_regex_from_options(meta_data_keywords),
118+
Token.Keyword.Reserved,
119+
"objectattribute",
120+
),
121+
],
122+
"other_keywords": [
123+
(
124+
_get_regex_from_options(other_keywords),
125+
Token.Keyword.Reserved,
126+
"objectattribute",
127+
),
128+
],
129+
"keywords": [
130+
include("core_keywords"),
131+
include("applicator_keywords"),
132+
include("validation_keywords"),
133+
include("meta_data_keywords"),
134+
include("other_keywords"),
135+
],
136+
137+
# represents a simple terminal value
138+
"simplevalue": [
139+
include("data_types"),
140+
(r"(true|false)", Token.Number),
141+
(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", Token.Number.Integer),
142+
('"(\\|"|[^"])*"', Token.String.Double),
143+
],
144+
145+
# the right hand side of an object, after the attribute name
146+
"objectattribute": [
147+
include("value"),
148+
(r":", Token.Punctuation),
149+
# comma terminates the attribute but expects more
150+
(r",", Token.Punctuation, "#pop"),
151+
# a closing bracket terminates the entire object, so pop twice
152+
(r"}", Token.Punctuation, ("#pop", "#pop")),
153+
],
154+
155+
# a json object - { attr, attr, ... }
156+
"objectvalue": [
157+
include("whitespace"),
158+
include("keywords"),
159+
(r'"(\\\\|\\"|[^"])*"', Token.Name.Tag, "objectattribute"),
160+
(r"}", Token.Punctuation, "#pop"),
161+
],
162+
163+
# json array - [ value, value, ... }
164+
"arrayvalue": [
165+
include("whitespace"),
166+
include("value"),
167+
(r",", Token.Punctuation),
168+
(r"]", Token.Punctuation, "#pop"),
169+
],
170+
171+
# a json value - either a simple value or a complex value (object or array)
172+
"value": [
173+
include("whitespace"),
174+
include("simplevalue"),
175+
(r"{", Token.Punctuation, "objectvalue"),
176+
(r"\[", Token.Punctuation, "arrayvalue"),
177+
],
178+
179+
# the root of a json document whould be a value
180+
"root": [
181+
include("value"),
182+
],
183+
}

0 commit comments

Comments
 (0)