2
2
Contains the main functionality of the JSONSchemaLexer.
3
3
"""
4
4
5
+ from importlib .resources import files
6
+ from pathlib import Path
5
7
from typing import Any , ClassVar
8
+ import json
6
9
7
10
from pygments .lexers .data import ( # type: ignore[reportMissingTypeStubs]
8
11
JsonLexer ,
9
12
)
10
13
from pygments .token import Token
11
- import jsonschema
12
14
13
15
14
16
class JSONSchemaLexer (JsonLexer ):
@@ -33,11 +35,13 @@ class JSONSchemaLexer(JsonLexer):
33
35
'"boolean"' ,
34
36
'"null"' ,
35
37
]
36
-
38
+ keywords : ClassVar [dict [str | None , list [str ]]] = {}
39
+ identifier : ClassVar [dict [str | None , str ]] = {}
37
40
default_dialect = None
38
41
39
42
def __init__ (self , default_dialect : str | None = None ):
40
43
super ().__init__ () # type: ignore[reportUnknownMemberType]
44
+ self ._populate_keywords_and_identifiers ()
41
45
if default_dialect and default_dialect [0 ] != '"' :
42
46
default_dialect = '"' + default_dialect
43
47
@@ -46,61 +50,25 @@ def __init__(self, default_dialect: str | None = None):
46
50
47
51
self .default_dialect = default_dialect
48
52
49
- def get_dialect_keywords (self , dialect_url : str | None ) -> list [str ]:
50
- match dialect_url :
51
- case '"https://json-schema.org/draft/2020-12/schema"' :
52
- return list (
53
- jsonschema .Draft202012Validator .VALIDATORS .keys (),
54
- ) + [
55
- "$schema" ,
56
- "$id" ,
57
- ]
58
- case '"https://json-schema.org/draft/2019-09/schema"' :
59
- return list (
60
- jsonschema .Draft201909Validator .VALIDATORS .keys (),
61
- ) + [
62
- "$schema" ,
63
- "$id" ,
64
- ]
65
- case '"http://json-schema.org/draft-07/schema#"' :
66
- return list (jsonschema .Draft7Validator .VALIDATORS .keys ()) + [
67
- "$schema" ,
68
- "$id" ,
69
- ]
70
- case '"http://json-schema.org/draft-06/schema#"' :
71
- return list (jsonschema .Draft6Validator .VALIDATORS .keys ()) + [
72
- "$schema" ,
73
- "$id" ,
74
- ]
75
- case '"http://json-schema.org/draft-04/schema#"' :
76
- return list (jsonschema .Draft4Validator .VALIDATORS .keys ()) + [
77
- "$schema" ,
78
- "id" ,
79
- ]
80
- case '"http://json-schema.org/draft-03/schema#"' :
81
- return list (jsonschema .Draft3Validator .VALIDATORS .keys ()) + [
82
- "$schema" ,
83
- "id" ,
84
- ]
85
- case _:
86
- return []
87
-
88
- def get_dialect_identifier (self , dialect : str | None ):
89
- match dialect :
90
- case '"https://json-schema.org/draft/2020-12/schema"' :
91
- return '"$id"'
92
- case '"https://json-schema.org/draft/2019-09/schema"' :
93
- return '"$id"'
94
- case '"http://json-schema.org/draft-07/schema#"' :
95
- return '"$id"'
96
- case '"http://json-schema.org/draft-06/schema#"' :
97
- return '"$id"'
98
- case '"http://json-schema.org/draft-04/schema#"' :
99
- return '"id"'
100
- case '"https://json-schema.org/draft-03/schema"' :
101
- return '"id"'
102
- case _:
103
- return None
53
+ def _populate_keywords_and_identifiers (self ):
54
+ dialect_files = files ("jsonschema_lexer" ) / "data" / "keywords"
55
+ if not dialect_files .is_dir ():
56
+ dialect_files = Path (__file__ ).parent .parent / "data" / "keywords"
57
+ for dialect_file in dialect_files .iterdir ():
58
+ with dialect_file .open () as file :
59
+ json_content = json .load (file )
60
+ dialect_name = self ._make_string_double_quoted (
61
+ json_content ["dialect" ],
62
+ )
63
+ self .keywords [dialect_name ] = json_content ["keywords" ]
64
+ self .identifier [dialect_name ] = (
65
+ self ._make_string_double_quoted (
66
+ json_content ["identifier" ],
67
+ )
68
+ )
69
+
70
+ def _make_string_double_quoted (self , string : str ):
71
+ return '"' + string + '"'
104
72
105
73
def _find_rightmost_token_index (
106
74
self ,
@@ -140,11 +108,9 @@ def _get_nearest_valid_dialect(
140
108
tokens ,
141
109
nearest_schema_index ,
142
110
)
143
- identifier = self .get_dialect_identifier (dialect )
144
- is_dialect_valid = (
145
- True
146
- if identifier or syntax_stack [nearest_schema_index ][0 ] == 0
147
- else False
111
+ identifier = self .identifier .get (dialect , None )
112
+ is_dialect_valid = bool (
113
+ identifier or syntax_stack [nearest_schema_index ][0 ] == 0 ,
148
114
)
149
115
nearest_identifier_index = self ._find_rightmost_token_index (
150
116
syntax_stack [: index + 1 ],
@@ -209,12 +175,14 @@ def map_tokens_by_schema(self, tokens: list[tuple[int, Any, str]]):
209
175
dialect = self ._get_nearest_valid_dialect (tokens , syntax_stack )
210
176
yield self ._parse_token_tuple (
211
177
(start , token , value ),
212
- self .get_dialect_keywords (dialect ),
178
+ self .keywords . get (dialect , [] ),
213
179
)
214
180
215
181
def get_tokens_unprocessed (self , text : str ): # type: ignore[reportUnknownParameterType]
216
182
"""
217
183
Add token classes to it according to JSON Schema.
218
184
"""
219
- json_tokens : list [tuple [int , Any , str ]] = list (super ().get_tokens_unprocessed (text )) # type: ignore[reportUnknownParameterType]
185
+ json_tokens : list [tuple [int , Any , str ]] = list (
186
+ super ().get_tokens_unprocessed (text ),
187
+ ) # type: ignore[reportUnknownParameterType]
220
188
yield from self .map_tokens_by_schema (json_tokens )
0 commit comments