|
1 | 1 | # Definition of Kind type - mapping from token string identifiers to
|
2 | 2 | # enumeration values as used in @K_str
|
3 |
| -const _kind_names = |
4 |
| -[ |
| 3 | + |
| 4 | +""" |
| 5 | + K"name" |
| 6 | + Kind(namestr) |
| 7 | +
|
| 8 | +`Kind` is a type tag for specifying the type of tokens and interior nodes of |
| 9 | +a syntax tree. Abstractly, this tag is used to define our own *sum types* for |
| 10 | +syntax tree nodes. We do this explicitly outside the Julia type system because |
| 11 | +(a) Julia doesn't have sum types and (b) we want concrete data structures which |
| 12 | +are unityped from the Julia compiler's point of view, for efficiency. |
| 13 | +
|
| 14 | +Naming rules: |
| 15 | +* Kinds which correspond to exactly one textural form are represented with that |
| 16 | + text. This includes keywords like K"for" and operators like K"*". |
| 17 | +* Kinds which represent many textural forms have UpperCamelCase names. This |
| 18 | + includes kinds like K"Identifier" and K"Comment". |
| 19 | +* Kinds which exist merely as delimiters are all uppercase |
| 20 | +""" |
| 21 | +primitive type Kind 16 end |
| 22 | + |
| 23 | +# The implementation of Kind here is basically similar to @enum. However we use |
| 24 | +# the K_str macro to self-name these kinds with their literal representation, |
| 25 | +# rather than needing to invent a new name for each. |
| 26 | + |
| 27 | +const _kind_str_to_int = Dict{String,UInt16}() |
| 28 | +const _kind_int_to_str = Dict{UInt16,String}() |
| 29 | +const _kind_modules = Dict{Int,Union{Symbol,Module}}( |
| 30 | + 0=>:JuliaSyntax, |
| 31 | + 1=>:JuliaLowering, |
| 32 | + 2=>:JuliaSyntaxFormatter |
| 33 | +) |
| 34 | +# Number of bits reserved for kind id's belonging to a single module |
| 35 | +const _kind_nbits = 10 |
| 36 | +const _kind_module_id_max = typemax(UInt16) >> _kind_nbits |
| 37 | + |
| 38 | +function Kind(x::Integer) |
| 39 | + if x < 0 || x > typemax(UInt16) |
| 40 | + throw(ArgumentError("Kind out of range: $x")) |
| 41 | + end |
| 42 | + return Base.bitcast(Kind, convert(UInt16, x)) |
| 43 | +end |
| 44 | + |
| 45 | +function Base.convert(::Type{String}, k::Kind) |
| 46 | + _kind_int_to_str[reinterpret(UInt16, k)] |
| 47 | +end |
| 48 | + |
| 49 | +function Base.convert(::Type{Kind}, s::AbstractString) |
| 50 | + i = get(_kind_str_to_int, s) do |
| 51 | + error("unknown Kind name $(repr(s))") |
| 52 | + end |
| 53 | + Kind(i) |
| 54 | +end |
| 55 | + |
| 56 | +Base.string(x::Kind) = convert(String, x) |
| 57 | +Base.print(io::IO, x::Kind) = print(io, convert(String, x)) |
| 58 | + |
| 59 | +Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y) |
| 60 | + |
| 61 | +function Base.show(io::IO, k::Kind) |
| 62 | + print(io, "K\"$(convert(String, k))\"") |
| 63 | +end |
| 64 | + |
| 65 | +# Save the string representation rather than the bit pattern so that kinds |
| 66 | +# can be serialized and deserialized across different JuliaSyntax versions. |
| 67 | +function Base.write(io::IO, k::Kind) |
| 68 | + str = convert(String, k) |
| 69 | + write(io, UInt8(length(str))) + write(io, str) |
| 70 | +end |
| 71 | +function Base.read(io::IO, ::Type{Kind}) |
| 72 | + len = read(io, UInt8) |
| 73 | + str = String(read(io, len)) |
| 74 | + convert(Kind, str) |
| 75 | +end |
| 76 | + |
| 77 | +function Base.parentmodule(k::Kind) |
| 78 | + mod_id = reinterpret(UInt16, k) >> _kind_nbits |
| 79 | + _kind_modules[mod_id]::Module |
| 80 | +end |
| 81 | + |
| 82 | +function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, module_id, names) |
| 83 | + if module_id > _kind_module_id_max |
| 84 | + error("Kind module id $module_id is out of range") |
| 85 | + elseif length(names) >= 1 << _kind_nbits |
| 86 | + error("Too many kind names") |
| 87 | + elseif !haskey(kind_modules, module_id) |
| 88 | + kind_modules[module_id] = mod |
| 89 | + else |
| 90 | + m = kind_modules[module_id] |
| 91 | + if m == nameof(mod) |
| 92 | + # Ok: known kind module, but not loaded until now |
| 93 | + kind_modules[module_id] = mod |
| 94 | + elseif m == mod |
| 95 | + existing_kinds = [(i = get(kind_str_to_int, n, nothing); |
| 96 | + isnothing(i) ? nothing : Kind(i)) for n in names] |
| 97 | + if any(isnothing, existing_kinds) || |
| 98 | + !issorted(existing_kinds) || |
| 99 | + any(k->parentmodule(k) != mod, existing_kinds) |
| 100 | + error("Error registering kinds for module $mod (register_kinds() called more than once inconsistently, or conflict with existing module kinds?)") |
| 101 | + else |
| 102 | + # Assume we're re-registering kinds as in top level vs `__init__` |
| 103 | + return |
| 104 | + end |
| 105 | + else |
| 106 | + error("Kind module ID $module_id already claimed by module $m") |
| 107 | + end |
| 108 | + end |
| 109 | + # Process names to conflate category BEGIN/END markers with the first/last |
| 110 | + # in the category. |
| 111 | + i = 0 |
| 112 | + for name in names |
| 113 | + normal_kind = false |
| 114 | + if startswith(name, "BEGIN_") |
| 115 | + j = i |
| 116 | + elseif startswith(name, "END_") |
| 117 | + j = i - 1 |
| 118 | + else |
| 119 | + normal_kind = true |
| 120 | + j = i |
| 121 | + i += 1 |
| 122 | + end |
| 123 | + kind_int = (module_id << _kind_nbits) | j |
| 124 | + push!(kind_str_to_int, name=>kind_int) |
| 125 | + if normal_kind |
| 126 | + push!(int_to_kindstr, kind_int=>name) |
| 127 | + end |
| 128 | + end |
| 129 | +end |
| 130 | + |
| 131 | +""" |
| 132 | + register_kinds!(mod, module_id, names) |
| 133 | +
|
| 134 | +Register custom `Kind`s with the given `names`, belonging to a module `mod`. |
| 135 | +`names` is an array of arbitrary strings. |
| 136 | +
|
| 137 | +In order for kinds to be represented by a small number of bits, some nontrivial |
| 138 | +cooperation is reqired between modules using custom kinds: |
| 139 | +* The integer `module_id` is globally unique for each `mod` which will be used |
| 140 | + together, and not larger than $_kind_module_id_max. |
| 141 | +* No two modules register the same `name`. The semantics of a given `kind` name |
| 142 | + should be defined by the module which owns it. |
| 143 | +
|
| 144 | +To allow ranges of kinds to be delimited and quickly tested for, some special |
| 145 | +names are allowed: `BEGIN_section` and `END_section` pairs are detected, and |
| 146 | +alias the next and previous kind id's respectively so that kinds in `section` |
| 147 | +can be tested with `BEGIN_section <= k <= END_section`. |
| 148 | +""" |
| 149 | +function register_kinds!(mod, module_id, names) |
| 150 | + _register_kinds!(_kind_modules, _kind_int_to_str, _kind_str_to_int, mod, module_id, names) |
| 151 | +end |
| 152 | + |
| 153 | +#------------------------------------------------------------------------------- |
| 154 | + |
| 155 | +""" |
| 156 | + K"s" |
| 157 | +
|
| 158 | +The kind of a token or AST internal node with string "s". |
| 159 | +
|
| 160 | +For example |
| 161 | +* K")" is the kind of the right parenthesis token |
| 162 | +* K"block" is the kind of a block of code (eg, statements within a begin-end). |
| 163 | +""" |
| 164 | +macro K_str(s) |
| 165 | + convert(Kind, s) |
| 166 | +end |
| 167 | + |
| 168 | +""" |
| 169 | +A set of kinds which can be used with the `in` operator. For example |
| 170 | +
|
| 171 | + k in KSet"+ - *" |
| 172 | +""" |
| 173 | +macro KSet_str(str) |
| 174 | + kinds = [convert(Kind, s) for s in split(str)] |
| 175 | + |
| 176 | + quote |
| 177 | + ($(kinds...),) |
| 178 | + end |
| 179 | +end |
| 180 | + |
| 181 | +""" |
| 182 | + kind(x) |
| 183 | +
|
| 184 | +Return the `Kind` of `x`. |
| 185 | +""" |
| 186 | +kind(k::Kind) = k |
| 187 | + |
| 188 | + |
| 189 | +#------------------------------------------------------------------------------- |
| 190 | +# Kinds used by JuliaSyntax |
| 191 | +register_kinds!(JuliaSyntax, 0, [ |
5 | 192 | "None" # Placeholder; never emitted by lexer
|
6 | 193 | "EndMarker" # EOF
|
7 | 194 | "Comment"
|
@@ -918,133 +1105,7 @@ const _kind_names =
|
918 | 1105 | # Container for a single statement/atom plus any trivia and errors
|
919 | 1106 | "wrapper"
|
920 | 1107 | "END_SYNTAX_KINDS"
|
921 |
| -] |
922 |
| - |
923 |
| -""" |
924 |
| - K"name" |
925 |
| - Kind(id) |
926 |
| -
|
927 |
| -`Kind` is a type tag for specifying the type of tokens and interior nodes of |
928 |
| -a syntax tree. Abstractly, this tag is used to define our own *sum types* for |
929 |
| -syntax tree nodes. We do this explicitly outside the Julia type system because |
930 |
| -(a) Julia doesn't have sum types and (b) we want concrete data structures which |
931 |
| -are unityped from the Julia compiler's point of view, for efficiency. |
932 |
| -
|
933 |
| -Naming rules: |
934 |
| -* Kinds which correspond to exactly one textural form are represented with that |
935 |
| - text. This includes keywords like K"for" and operators like K"*". |
936 |
| -* Kinds which represent many textural forms have UpperCamelCase names. This |
937 |
| - includes kinds like K"Identifier" and K"Comment". |
938 |
| -* Kinds which exist merely as delimiters are all uppercase |
939 |
| -""" |
940 |
| -primitive type Kind 16 end |
941 |
| - |
942 |
| -# The implementation of Kind here is basically similar to @enum. However we use |
943 |
| -# the K_str macro to self-name these kinds with their literal representation, |
944 |
| -# rather than needing to invent a new name for each. |
945 |
| - |
946 |
| -let kind_int_type = :UInt16 |
947 |
| - # Preprocess _kind_names to conflate category markers with the first/last |
948 |
| - # in the category. |
949 |
| - kindstr_to_int = Dict{String,UInt16}() |
950 |
| - i = 1 |
951 |
| - while i <= length(_kind_names) |
952 |
| - kn = _kind_names[i] |
953 |
| - kind_int = i-1 |
954 |
| - if startswith(kn, "BEGIN_") |
955 |
| - deleteat!(_kind_names, i) |
956 |
| - elseif startswith(kn, "END_") |
957 |
| - kind_int = i-2 |
958 |
| - deleteat!(_kind_names, i) |
959 |
| - else |
960 |
| - i += 1 |
961 |
| - end |
962 |
| - push!(kindstr_to_int, kn=>kind_int) |
963 |
| - end |
964 |
| - |
965 |
| - max_kind_int = length(_kind_names)-1 |
966 |
| - |
967 |
| - @eval begin |
968 |
| - function Kind(x::Integer) |
969 |
| - if x < 0 || x > $max_kind_int |
970 |
| - throw(ArgumentError("Kind out of range: $x")) |
971 |
| - end |
972 |
| - return Base.bitcast(Kind, convert($kind_int_type, x)) |
973 |
| - end |
974 |
| - |
975 |
| - Base.convert(::Type{String}, k::Kind) = _kind_names[1 + reinterpret($kind_int_type, k)] |
976 |
| - |
977 |
| - let kindstr_to_int=$kindstr_to_int |
978 |
| - function Base.convert(::Type{Kind}, s::AbstractString) |
979 |
| - i = get(kindstr_to_int, s) do |
980 |
| - error("unknown Kind name $(repr(s))") |
981 |
| - end |
982 |
| - Kind(i) |
983 |
| - end |
984 |
| - end |
985 |
| - |
986 |
| - Base.string(x::Kind) = convert(String, x) |
987 |
| - Base.print(io::IO, x::Kind) = print(io, convert(String, x)) |
988 |
| - |
989 |
| - Base.typemin(::Type{Kind}) = Kind(0) |
990 |
| - Base.typemax(::Type{Kind}) = Kind($max_kind_int) |
991 |
| - |
992 |
| - Base.:<(x::Kind, y::Kind) = reinterpret($kind_int_type, x) < reinterpret($kind_int_type, y) |
993 |
| - |
994 |
| - Base.instances(::Type{Kind}) = (Kind(i) for i in reinterpret($kind_int_type, typemin(Kind)):reinterpret($kind_int_type, typemax(Kind))) |
995 |
| - end |
996 |
| -end |
997 |
| - |
998 |
| -function Base.show(io::IO, k::Kind) |
999 |
| - print(io, "K\"$(convert(String, k))\"") |
1000 |
| -end |
1001 |
| - |
1002 |
| -# Save the string representation rather than the bit pattern so that kinds |
1003 |
| -# can be serialized and deserialized across different JuliaSyntax versions. |
1004 |
| -function Base.write(io::IO, k::Kind) |
1005 |
| - str = convert(String, k) |
1006 |
| - write(io, UInt8(length(str))) + write(io, str) |
1007 |
| -end |
1008 |
| -function Base.read(io::IO, ::Type{Kind}) |
1009 |
| - len = read(io, UInt8) |
1010 |
| - str = String(read(io, len)) |
1011 |
| - convert(Kind, str) |
1012 |
| -end |
1013 |
| - |
1014 |
| -#------------------------------------------------------------------------------- |
1015 |
| - |
1016 |
| -""" |
1017 |
| - K"s" |
1018 |
| -
|
1019 |
| -The kind of a token or AST internal node with string "s". |
1020 |
| -
|
1021 |
| -For example |
1022 |
| -* K")" is the kind of the right parenthesis token |
1023 |
| -* K"block" is the kind of a block of code (eg, statements within a begin-end). |
1024 |
| -""" |
1025 |
| -macro K_str(s) |
1026 |
| - convert(Kind, s) |
1027 |
| -end |
1028 |
| - |
1029 |
| -""" |
1030 |
| -A set of kinds which can be used with the `in` operator. For example |
1031 |
| -
|
1032 |
| - k in KSet"+ - *" |
1033 |
| -""" |
1034 |
| -macro KSet_str(str) |
1035 |
| - kinds = [convert(Kind, s) for s in split(str)] |
1036 |
| - |
1037 |
| - quote |
1038 |
| - ($(kinds...),) |
1039 |
| - end |
1040 |
| -end |
1041 |
| - |
1042 |
| -""" |
1043 |
| - kind(x) |
1044 |
| -
|
1045 |
| -Return the `Kind` of `x`. |
1046 |
| -""" |
1047 |
| -kind(k::Kind) = k |
| 1108 | +]) |
1048 | 1109 |
|
1049 | 1110 | #-------------------------------------------------------------------------------
|
1050 | 1111 | const _nonunique_kind_names = Set([
|
|
0 commit comments