Skip to content

Commit f99b76f

Browse files
authored
Allow Kinds to be registered by packages outside JuliaSyntax (#461)
Extensible kinds are quite tricky. We want * To use a small number of bits for them * To have the string representation in the source, but have the compiler able to fully inline the integer representation. * Allow modules with different kinds to cooperate together on the same integer representation. * Not trigger invalidation when new kinds are added * Different `Kind` modules to not require cooperation This is a very hard set of constraints to satisfy. The last one is already impossible in a single flat namespace so in this design we've given up on it and require cooperation between all kind extension modules, including module authors allocating non-colliding id's for their modules, in addition to non-colliding kind names.
1 parent a63e8bb commit f99b76f

File tree

3 files changed

+250
-129
lines changed

3 files changed

+250
-129
lines changed

src/kinds.jl

Lines changed: 190 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,194 @@
11
# Definition of Kind type - mapping from token string identifiers to
22
# enumeration values as used in @K_str
3-
const _kind_names =
4-
[
3+
4+
"""
5+
K"name"
6+
Kind(namestr)
7+
8+
`Kind` is a type tag for specifying the type of tokens and interior nodes of
9+
a syntax tree. Abstractly, this tag is used to define our own *sum types* for
10+
syntax tree nodes. We do this explicitly outside the Julia type system because
11+
(a) Julia doesn't have sum types and (b) we want concrete data structures which
12+
are unityped from the Julia compiler's point of view, for efficiency.
13+
14+
Naming rules:
15+
* Kinds which correspond to exactly one textural form are represented with that
16+
text. This includes keywords like K"for" and operators like K"*".
17+
* Kinds which represent many textural forms have UpperCamelCase names. This
18+
includes kinds like K"Identifier" and K"Comment".
19+
* Kinds which exist merely as delimiters are all uppercase
20+
"""
21+
primitive type Kind 16 end
22+
23+
# The implementation of Kind here is basically similar to @enum. However we use
24+
# the K_str macro to self-name these kinds with their literal representation,
25+
# rather than needing to invent a new name for each.
26+
27+
const _kind_str_to_int = Dict{String,UInt16}()
28+
const _kind_int_to_str = Dict{UInt16,String}()
29+
const _kind_modules = Dict{Int,Union{Symbol,Module}}(
30+
0=>:JuliaSyntax,
31+
1=>:JuliaLowering,
32+
2=>:JuliaSyntaxFormatter
33+
)
34+
# Number of bits reserved for kind id's belonging to a single module
35+
const _kind_nbits = 10
36+
const _kind_module_id_max = typemax(UInt16) >> _kind_nbits
37+
38+
function Kind(x::Integer)
39+
if x < 0 || x > typemax(UInt16)
40+
throw(ArgumentError("Kind out of range: $x"))
41+
end
42+
return Base.bitcast(Kind, convert(UInt16, x))
43+
end
44+
45+
function Base.convert(::Type{String}, k::Kind)
46+
_kind_int_to_str[reinterpret(UInt16, k)]
47+
end
48+
49+
function Base.convert(::Type{Kind}, s::AbstractString)
50+
i = get(_kind_str_to_int, s) do
51+
error("unknown Kind name $(repr(s))")
52+
end
53+
Kind(i)
54+
end
55+
56+
Base.string(x::Kind) = convert(String, x)
57+
Base.print(io::IO, x::Kind) = print(io, convert(String, x))
58+
59+
Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y)
60+
61+
function Base.show(io::IO, k::Kind)
62+
print(io, "K\"$(convert(String, k))\"")
63+
end
64+
65+
# Save the string representation rather than the bit pattern so that kinds
66+
# can be serialized and deserialized across different JuliaSyntax versions.
67+
function Base.write(io::IO, k::Kind)
68+
str = convert(String, k)
69+
write(io, UInt8(length(str))) + write(io, str)
70+
end
71+
function Base.read(io::IO, ::Type{Kind})
72+
len = read(io, UInt8)
73+
str = String(read(io, len))
74+
convert(Kind, str)
75+
end
76+
77+
function Base.parentmodule(k::Kind)
78+
mod_id = reinterpret(UInt16, k) >> _kind_nbits
79+
_kind_modules[mod_id]::Module
80+
end
81+
82+
function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, module_id, names)
83+
if module_id > _kind_module_id_max
84+
error("Kind module id $module_id is out of range")
85+
elseif length(names) >= 1 << _kind_nbits
86+
error("Too many kind names")
87+
elseif !haskey(kind_modules, module_id)
88+
kind_modules[module_id] = mod
89+
else
90+
m = kind_modules[module_id]
91+
if m == nameof(mod)
92+
# Ok: known kind module, but not loaded until now
93+
kind_modules[module_id] = mod
94+
elseif m == mod
95+
existing_kinds = [(i = get(kind_str_to_int, n, nothing);
96+
isnothing(i) ? nothing : Kind(i)) for n in names]
97+
if any(isnothing, existing_kinds) ||
98+
!issorted(existing_kinds) ||
99+
any(k->parentmodule(k) != mod, existing_kinds)
100+
error("Error registering kinds for module $mod (register_kinds() called more than once inconsistently, or conflict with existing module kinds?)")
101+
else
102+
# Assume we're re-registering kinds as in top level vs `__init__`
103+
return
104+
end
105+
else
106+
error("Kind module ID $module_id already claimed by module $m")
107+
end
108+
end
109+
# Process names to conflate category BEGIN/END markers with the first/last
110+
# in the category.
111+
i = 0
112+
for name in names
113+
normal_kind = false
114+
if startswith(name, "BEGIN_")
115+
j = i
116+
elseif startswith(name, "END_")
117+
j = i - 1
118+
else
119+
normal_kind = true
120+
j = i
121+
i += 1
122+
end
123+
kind_int = (module_id << _kind_nbits) | j
124+
push!(kind_str_to_int, name=>kind_int)
125+
if normal_kind
126+
push!(int_to_kindstr, kind_int=>name)
127+
end
128+
end
129+
end
130+
131+
"""
132+
register_kinds!(mod, module_id, names)
133+
134+
Register custom `Kind`s with the given `names`, belonging to a module `mod`.
135+
`names` is an array of arbitrary strings.
136+
137+
In order for kinds to be represented by a small number of bits, some nontrivial
138+
cooperation is reqired between modules using custom kinds:
139+
* The integer `module_id` is globally unique for each `mod` which will be used
140+
together, and not larger than $_kind_module_id_max.
141+
* No two modules register the same `name`. The semantics of a given `kind` name
142+
should be defined by the module which owns it.
143+
144+
To allow ranges of kinds to be delimited and quickly tested for, some special
145+
names are allowed: `BEGIN_section` and `END_section` pairs are detected, and
146+
alias the next and previous kind id's respectively so that kinds in `section`
147+
can be tested with `BEGIN_section <= k <= END_section`.
148+
"""
149+
function register_kinds!(mod, module_id, names)
150+
_register_kinds!(_kind_modules, _kind_int_to_str, _kind_str_to_int, mod, module_id, names)
151+
end
152+
153+
#-------------------------------------------------------------------------------
154+
155+
"""
156+
K"s"
157+
158+
The kind of a token or AST internal node with string "s".
159+
160+
For example
161+
* K")" is the kind of the right parenthesis token
162+
* K"block" is the kind of a block of code (eg, statements within a begin-end).
163+
"""
164+
macro K_str(s)
165+
convert(Kind, s)
166+
end
167+
168+
"""
169+
A set of kinds which can be used with the `in` operator. For example
170+
171+
k in KSet"+ - *"
172+
"""
173+
macro KSet_str(str)
174+
kinds = [convert(Kind, s) for s in split(str)]
175+
176+
quote
177+
($(kinds...),)
178+
end
179+
end
180+
181+
"""
182+
kind(x)
183+
184+
Return the `Kind` of `x`.
185+
"""
186+
kind(k::Kind) = k
187+
188+
189+
#-------------------------------------------------------------------------------
190+
# Kinds used by JuliaSyntax
191+
register_kinds!(JuliaSyntax, 0, [
5192
"None" # Placeholder; never emitted by lexer
6193
"EndMarker" # EOF
7194
"Comment"
@@ -918,133 +1105,7 @@ const _kind_names =
9181105
# Container for a single statement/atom plus any trivia and errors
9191106
"wrapper"
9201107
"END_SYNTAX_KINDS"
921-
]
922-
923-
"""
924-
K"name"
925-
Kind(id)
926-
927-
`Kind` is a type tag for specifying the type of tokens and interior nodes of
928-
a syntax tree. Abstractly, this tag is used to define our own *sum types* for
929-
syntax tree nodes. We do this explicitly outside the Julia type system because
930-
(a) Julia doesn't have sum types and (b) we want concrete data structures which
931-
are unityped from the Julia compiler's point of view, for efficiency.
932-
933-
Naming rules:
934-
* Kinds which correspond to exactly one textural form are represented with that
935-
text. This includes keywords like K"for" and operators like K"*".
936-
* Kinds which represent many textural forms have UpperCamelCase names. This
937-
includes kinds like K"Identifier" and K"Comment".
938-
* Kinds which exist merely as delimiters are all uppercase
939-
"""
940-
primitive type Kind 16 end
941-
942-
# The implementation of Kind here is basically similar to @enum. However we use
943-
# the K_str macro to self-name these kinds with their literal representation,
944-
# rather than needing to invent a new name for each.
945-
946-
let kind_int_type = :UInt16
947-
# Preprocess _kind_names to conflate category markers with the first/last
948-
# in the category.
949-
kindstr_to_int = Dict{String,UInt16}()
950-
i = 1
951-
while i <= length(_kind_names)
952-
kn = _kind_names[i]
953-
kind_int = i-1
954-
if startswith(kn, "BEGIN_")
955-
deleteat!(_kind_names, i)
956-
elseif startswith(kn, "END_")
957-
kind_int = i-2
958-
deleteat!(_kind_names, i)
959-
else
960-
i += 1
961-
end
962-
push!(kindstr_to_int, kn=>kind_int)
963-
end
964-
965-
max_kind_int = length(_kind_names)-1
966-
967-
@eval begin
968-
function Kind(x::Integer)
969-
if x < 0 || x > $max_kind_int
970-
throw(ArgumentError("Kind out of range: $x"))
971-
end
972-
return Base.bitcast(Kind, convert($kind_int_type, x))
973-
end
974-
975-
Base.convert(::Type{String}, k::Kind) = _kind_names[1 + reinterpret($kind_int_type, k)]
976-
977-
let kindstr_to_int=$kindstr_to_int
978-
function Base.convert(::Type{Kind}, s::AbstractString)
979-
i = get(kindstr_to_int, s) do
980-
error("unknown Kind name $(repr(s))")
981-
end
982-
Kind(i)
983-
end
984-
end
985-
986-
Base.string(x::Kind) = convert(String, x)
987-
Base.print(io::IO, x::Kind) = print(io, convert(String, x))
988-
989-
Base.typemin(::Type{Kind}) = Kind(0)
990-
Base.typemax(::Type{Kind}) = Kind($max_kind_int)
991-
992-
Base.:<(x::Kind, y::Kind) = reinterpret($kind_int_type, x) < reinterpret($kind_int_type, y)
993-
994-
Base.instances(::Type{Kind}) = (Kind(i) for i in reinterpret($kind_int_type, typemin(Kind)):reinterpret($kind_int_type, typemax(Kind)))
995-
end
996-
end
997-
998-
function Base.show(io::IO, k::Kind)
999-
print(io, "K\"$(convert(String, k))\"")
1000-
end
1001-
1002-
# Save the string representation rather than the bit pattern so that kinds
1003-
# can be serialized and deserialized across different JuliaSyntax versions.
1004-
function Base.write(io::IO, k::Kind)
1005-
str = convert(String, k)
1006-
write(io, UInt8(length(str))) + write(io, str)
1007-
end
1008-
function Base.read(io::IO, ::Type{Kind})
1009-
len = read(io, UInt8)
1010-
str = String(read(io, len))
1011-
convert(Kind, str)
1012-
end
1013-
1014-
#-------------------------------------------------------------------------------
1015-
1016-
"""
1017-
K"s"
1018-
1019-
The kind of a token or AST internal node with string "s".
1020-
1021-
For example
1022-
* K")" is the kind of the right parenthesis token
1023-
* K"block" is the kind of a block of code (eg, statements within a begin-end).
1024-
"""
1025-
macro K_str(s)
1026-
convert(Kind, s)
1027-
end
1028-
1029-
"""
1030-
A set of kinds which can be used with the `in` operator. For example
1031-
1032-
k in KSet"+ - *"
1033-
"""
1034-
macro KSet_str(str)
1035-
kinds = [convert(Kind, s) for s in split(str)]
1036-
1037-
quote
1038-
($(kinds...),)
1039-
end
1040-
end
1041-
1042-
"""
1043-
kind(x)
1044-
1045-
Return the `Kind` of `x`.
1046-
"""
1047-
kind(k::Kind) = k
1108+
])
10481109

10491110
#-------------------------------------------------------------------------------
10501111
const _nonunique_kind_names = Set([

test/kinds.jl

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Only test this once per session, as kind modules must be unique (ugh)
2+
if !isdefined(@__MODULE__, :FooKinds)
3+
@eval module FooKinds
4+
5+
using JuliaSyntax
6+
7+
function _init_kinds()
8+
JuliaSyntax.register_kinds!(@__MODULE__, 42, [
9+
"BEGIN_FOO"
10+
"foo_1"
11+
"foo_2"
12+
"BEGIN_FOOBAR"
13+
"foobar_1"
14+
"foobar_2"
15+
"END_FOOBAR"
16+
"END_FOO"
17+
])
18+
end
19+
20+
_init_kinds()
21+
22+
k_before_init = K"foo_1"
23+
24+
function __init__()
25+
_init_kinds()
26+
end
27+
28+
end
29+
30+
@eval module BarKinds
31+
# Intentionally empty
32+
end
33+
34+
end
35+
36+
@testset "Kinds" begin
37+
@test K"foo_1" != K"foo_2"
38+
39+
@test FooKinds.k_before_init == K"foo_1"
40+
41+
@test K"BEGIN_FOO" == K"foo_1"
42+
@test K"foo_2" < K"BEGIN_FOOBAR"
43+
@test K"BEGIN_FOOBAR" == K"foobar_1"
44+
@test K"END_FOOBAR" == K"foobar_2"
45+
@test K"END_FOO" == K"foobar_2"
46+
47+
@test parentmodule(K"foo_1") == FooKinds
48+
@test sprint(show, K"foo_1") == "K\"foo_1\""
49+
50+
# Too many kind modules
51+
@test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 64, ["hoo?"])
52+
# Too many kind names per module
53+
@test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, string.(1:1024))
54+
# Re-registering or registering new kinds is not supported
55+
@test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_2", "foo_1"])
56+
@test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_3"])
57+
# Module ID already taken by FooKinds
58+
@test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, ["hii?"])
59+
end

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ include("test_utils_tests.jl")
1212
include("fuzz_test.jl")
1313

1414
include("utils.jl")
15+
include("kinds.jl")
1516

1617
@testset "Tokenize" begin
1718
include("tokenize.jl")

0 commit comments

Comments
 (0)