|
1 |
| -#= |
2 |
| -The code in here has been extracted from the JuliaParser.jl package |
3 |
| -with license: |
4 |
| -
|
5 |
| -The JuliaParser.jl package is licensed under the MIT "Expat" License: |
6 |
| -
|
7 |
| -> Copyright (c) 2014: Jake Bolewski. |
8 |
| -> |
9 |
| -> Permission is hereby granted, free of charge, to any person obtaining |
10 |
| -> a copy of this software and associated documentation files (the |
11 |
| -> "Software"), to deal in the Software without restriction, including |
12 |
| -> without limitation the rights to use, copy, modify, merge, publish, |
13 |
| -> distribute, sublicense, and/or sell copies of the Software, and to |
14 |
| -> permit persons to whom the Software is furnished to do so, subject to |
15 |
| -> the following conditions: |
16 |
| -> |
17 |
| -> The above copyright notice and this permission notice shall be |
18 |
| -> included in all copies or substantial portions of the Software. |
19 |
| -> |
20 |
| -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
21 |
| -> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
22 |
| -> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
23 |
| -> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
24 |
| -> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
25 |
| -> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
26 |
| -> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
27 |
| -=# |
28 |
| - |
29 | 1 | import Base.Unicode
|
30 | 2 |
|
31 |
| - |
32 |
| -@inline function utf8_trailing(i) |
33 |
| - if i < 193 |
34 |
| - return 0 |
35 |
| - elseif i < 225 |
36 |
| - return 1 |
37 |
| - elseif i < 241 |
38 |
| - return 2 |
39 |
| - elseif i < 249 |
40 |
| - return 3 |
41 |
| - elseif i < 253 |
42 |
| - return 4 |
43 |
| - else |
44 |
| - return 5 |
45 |
| - end |
46 |
| -end |
47 |
| - |
48 |
| -const utf8_offset = [0x00000000 |
49 |
| - 0x00003080 |
50 |
| - 0x000e2080 |
51 |
| - 0x03c82080 |
52 |
| - 0xfa082080 |
53 |
| - 0x82082080] |
54 |
| -# const EOF_CHAR = convert(Char,typemax(UInt32)) |
55 | 3 | const EOF_CHAR = typemax(Char)
|
56 | 4 |
|
57 |
| - |
58 |
| -function is_cat_id_start(ch::Char, cat::Integer) |
59 |
| - c = UInt32(ch) |
60 |
| - return (cat == Unicode.UTF8PROC_CATEGORY_LU || cat == Unicode.UTF8PROC_CATEGORY_LL || |
61 |
| - cat == Unicode.UTF8PROC_CATEGORY_LT || cat == Unicode.UTF8PROC_CATEGORY_LM || |
62 |
| - cat == Unicode.UTF8PROC_CATEGORY_LO || cat == Unicode.UTF8PROC_CATEGORY_NL || |
63 |
| - cat == Unicode.UTF8PROC_CATEGORY_SC || # allow currency symbols |
64 |
| - cat == Unicode.UTF8PROC_CATEGORY_SO || # other symbols |
65 |
| - |
66 |
| - # math symbol (category Sm) whitelist |
67 |
| - (c >= 0x2140 && c <= 0x2a1c && |
68 |
| - ((c >= 0x2140 && c <= 0x2144) || # ⅀, ⅁, ⅂, ⅃, ⅄ |
69 |
| - c == 0x223f || c == 0x22be || c == 0x22bf || # ∿, ⊾, ⊿ |
70 |
| - c == 0x22a4 || c == 0x22a5 || # ⊤ ⊥ |
71 |
| - |
72 |
| - (c >= 0x2202 && c <= 0x2233 && |
73 |
| - (c == 0x2202 || c == 0x2205 || c == 0x2206 || # ∂, ∅, ∆ |
74 |
| - c == 0x2207 || c == 0x220e || c == 0x220f || # ∇, ∎, ∏ |
75 |
| - c == 0x2210 || c == 0x2211 || # ∐, ∑ |
76 |
| - c == 0x221e || c == 0x221f || # ∞, ∟ |
77 |
| - c >= 0x222b)) || # ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ |
78 |
| - |
79 |
| - (c >= 0x22c0 && c <= 0x22c3) || # N-ary big ops: ⋀, ⋁, ⋂, ⋃ |
80 |
| - (c >= 0x25F8 && c <= 0x25ff) || # ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ |
81 |
| - |
82 |
| - (c >= 0x266f && |
83 |
| - (c == 0x266f || c == 0x27d8 || c == 0x27d9 || # ♯, ⟘, ⟙ |
84 |
| - (c >= 0x27c0 && c <= 0x27c1) || # ⟀, ⟁ |
85 |
| - (c >= 0x29b0 && c <= 0x29b4) || # ⦰, ⦱, ⦲, ⦳, ⦴ |
86 |
| - (c >= 0x2a00 && c <= 0x2a06) || # ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ |
87 |
| - (c >= 0x2a09 && c <= 0x2a16) || # ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, |
88 |
| - # ⨓, ⨔, ⨕, ⨖ |
89 |
| - c == 0x2a1b || c == 0x2a1c)))) || # ⨛, ⨜ |
90 |
| - |
91 |
| - (c >= 0x1d6c1 && # variants of \nabla and \partial |
92 |
| - (c == 0x1d6c1 || c == 0x1d6db || |
93 |
| - c == 0x1d6fb || c == 0x1d715 || |
94 |
| - c == 0x1d735 || c == 0x1d74f || |
95 |
| - c == 0x1d76f || c == 0x1d789 || |
96 |
| - c == 0x1d7a9 || c == 0x1d7c3)) || |
97 |
| - |
98 |
| - # super- and subscript +-=() |
99 |
| - (c >= 0x207a && c <= 0x207e) || |
100 |
| - (c >= 0x208a && c <= 0x208e) || |
101 |
| - |
102 |
| - # angle symbols |
103 |
| - (c >= 0x2220 && c <= 0x2222) || # ∠, ∡, ∢ |
104 |
| - (c >= 0x299b && c <= 0x29af) || # ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, |
105 |
| - # ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ |
106 |
| - # Other_ID_Start |
107 |
| - c == 0x2118 || c == 0x212E || # ℘, ℮ |
108 |
| - (c >= 0x309B && c <= 0x309C)) # katakana-hiragana sound marks |
109 |
| -end |
110 |
| - |
111 | 5 | function is_identifier_char(c::Char)
|
112 | 6 | c == EOF_CHAR && return false
|
113 | 7 | return Base.is_id_char(c)
|
@@ -145,55 +39,6 @@ function is_never_id_char(ch::Char)
|
145 | 39 | )
|
146 | 40 | end
|
147 | 41 |
|
148 |
| -function peekchar(io::Base.GenericIOBuffer) |
149 |
| - if !io.readable || io.ptr > io.size |
150 |
| - return EOF_CHAR |
151 |
| - end |
152 |
| - ch, _ = readutf(io) |
153 |
| - return ch |
154 |
| -end |
155 |
| - |
156 |
| -function readutf(io, offset = 0) |
157 |
| - ch = convert(UInt8, io.data[io.ptr + offset]) |
158 |
| - if ch < 0x80 |
159 |
| - return convert(Char, ch), 0 |
160 |
| - end |
161 |
| - trailing = utf8_trailing(ch + 1) |
162 |
| - c::UInt32 = 0 |
163 |
| - for j = 1:trailing |
164 |
| - c += ch |
165 |
| - c <<= 6 |
166 |
| - ch = convert(UInt8, io.data[io.ptr + j + offset]) |
167 |
| - end |
168 |
| - c += ch |
169 |
| - c -= utf8_offset[trailing + 1] |
170 |
| - return convert(Char, c), trailing |
171 |
| -end |
172 |
| - |
173 |
| -function dpeekchar(io::IOBuffer) |
174 |
| - if !io.readable || io.ptr > io.size |
175 |
| - return EOF_CHAR, EOF_CHAR |
176 |
| - end |
177 |
| - ch1, trailing = readutf(io) |
178 |
| - offset = trailing + 1 |
179 |
| - |
180 |
| - if io.ptr + offset > io.size |
181 |
| - return ch1, EOF_CHAR |
182 |
| - end |
183 |
| - ch2, _ = readutf(io, offset) |
184 |
| - |
185 |
| - return ch1, ch2 |
186 |
| -end |
187 |
| - |
188 |
| -# this implementation is copied from Base |
189 |
| -peekchar(s::IOStream) = begin |
190 |
| - _CHTMP = Ref{Char}() |
191 |
| - if ccall(:ios_peekutf8, Int32, (Ptr{Nothing}, Ptr{Char}), s, _CHTMP) < 0 |
192 |
| - return EOF_CHAR |
193 |
| - end |
194 |
| - return _CHTMP[] |
195 |
| -end |
196 |
| - |
197 | 42 | eof(io::IO) = Base.eof(io)
|
198 | 43 | eof(c::Char) = c === EOF_CHAR
|
199 | 44 |
|
|
0 commit comments