Skip to content

Commit 77b4044

Browse files
authored
get rid of some unused code in tokenize (#15)
1 parent f7c0dd6 commit 77b4044

File tree

2 files changed

+1
-158
lines changed

2 files changed

+1
-158
lines changed

Tokenize/src/_precompile.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ function _precompile_()
99
precompile(Tokenize.Tokens.Token, ())
1010
precompile(Tokenize.Tokens.kind, (Tokenize.Tokens.Token,))
1111

12-
precompile(Tokenize.Lexers.is_cat_id_start, (Char, Int32,))
13-
precompile(Tokenize.Lexers.is_identifier_char, (Char,))
1412
precompile(Tokenize.Lexers.is_identifier_start_char, (Char,))
1513
precompile(Tokenize.Lexers.readchar, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
1614
precompile(Tokenize.Lexers.next_token, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}},))
@@ -68,5 +66,5 @@ function _precompile_()
6866
precompile(Tokenize.Lexers.accept_batch, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, typeof(Tokenize.Lexers.isdigit),))
6967
precompile(Tokenize.Lexers.accept, (Tokenize.Lexers.Lexer{GenericIOBuffer{Array{UInt8, 1}}}, Char,))
7068

71-
precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},))
69+
precompile(Tokenize.Lexers.readchar, (GenericIOBuffer{Array{UInt8, 1}},))
7270
end

Tokenize/src/utilities.jl

Lines changed: 0 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,113 +1,7 @@
1-
#=
2-
The code in here has been extracted from the JuliaParser.jl package
3-
with license:
4-
5-
The JuliaParser.jl package is licensed under the MIT "Expat" License:
6-
7-
> Copyright (c) 2014: Jake Bolewski.
8-
>
9-
> Permission is hereby granted, free of charge, to any person obtaining
10-
> a copy of this software and associated documentation files (the
11-
> "Software"), to deal in the Software without restriction, including
12-
> without limitation the rights to use, copy, modify, merge, publish,
13-
> distribute, sublicense, and/or sell copies of the Software, and to
14-
> permit persons to whom the Software is furnished to do so, subject to
15-
> the following conditions:
16-
>
17-
> The above copyright notice and this permission notice shall be
18-
> included in all copies or substantial portions of the Software.
19-
>
20-
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21-
> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22-
> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23-
> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24-
> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25-
> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26-
> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27-
=#
28-
291
import Base.Unicode
302

31-
32-
@inline function utf8_trailing(i)
33-
if i < 193
34-
return 0
35-
elseif i < 225
36-
return 1
37-
elseif i < 241
38-
return 2
39-
elseif i < 249
40-
return 3
41-
elseif i < 253
42-
return 4
43-
else
44-
return 5
45-
end
46-
end
47-
48-
const utf8_offset = [0x00000000
49-
0x00003080
50-
0x000e2080
51-
0x03c82080
52-
0xfa082080
53-
0x82082080]
54-
# const EOF_CHAR = convert(Char,typemax(UInt32))
553
const EOF_CHAR = typemax(Char)
564

57-
58-
function is_cat_id_start(ch::Char, cat::Integer)
59-
c = UInt32(ch)
60-
return (cat == Unicode.UTF8PROC_CATEGORY_LU || cat == Unicode.UTF8PROC_CATEGORY_LL ||
61-
cat == Unicode.UTF8PROC_CATEGORY_LT || cat == Unicode.UTF8PROC_CATEGORY_LM ||
62-
cat == Unicode.UTF8PROC_CATEGORY_LO || cat == Unicode.UTF8PROC_CATEGORY_NL ||
63-
cat == Unicode.UTF8PROC_CATEGORY_SC || # allow currency symbols
64-
cat == Unicode.UTF8PROC_CATEGORY_SO || # other symbols
65-
66-
# math symbol (category Sm) whitelist
67-
(c >= 0x2140 && c <= 0x2a1c &&
68-
((c >= 0x2140 && c <= 0x2144) || # ⅀, ⅁, ⅂, ⅃, ⅄
69-
c == 0x223f || c == 0x22be || c == 0x22bf || # ∿, ⊾, ⊿
70-
c == 0x22a4 || c == 0x22a5 || # ⊤ ⊥
71-
72-
(c >= 0x2202 && c <= 0x2233 &&
73-
(c == 0x2202 || c == 0x2205 || c == 0x2206 || # ∂, ∅, ∆
74-
c == 0x2207 || c == 0x220e || c == 0x220f || # ∇, ∎, ∏
75-
c == 0x2210 || c == 0x2211 || # ∐, ∑
76-
c == 0x221e || c == 0x221f || # ∞, ∟
77-
c >= 0x222b)) || # ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
78-
79-
(c >= 0x22c0 && c <= 0x22c3) || # N-ary big ops: ⋀, ⋁, ⋂, ⋃
80-
(c >= 0x25F8 && c <= 0x25ff) || # ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿
81-
82-
(c >= 0x266f &&
83-
(c == 0x266f || c == 0x27d8 || c == 0x27d9 || # ♯, ⟘, ⟙
84-
(c >= 0x27c0 && c <= 0x27c1) || # ⟀, ⟁
85-
(c >= 0x29b0 && c <= 0x29b4) || # ⦰, ⦱, ⦲, ⦳, ⦴
86-
(c >= 0x2a00 && c <= 0x2a06) || # ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆
87-
(c >= 0x2a09 && c <= 0x2a16) || # ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒,
88-
# ⨓, ⨔, ⨕, ⨖
89-
c == 0x2a1b || c == 0x2a1c)))) || # ⨛, ⨜
90-
91-
(c >= 0x1d6c1 && # variants of \nabla and \partial
92-
(c == 0x1d6c1 || c == 0x1d6db ||
93-
c == 0x1d6fb || c == 0x1d715 ||
94-
c == 0x1d735 || c == 0x1d74f ||
95-
c == 0x1d76f || c == 0x1d789 ||
96-
c == 0x1d7a9 || c == 0x1d7c3)) ||
97-
98-
# super- and subscript +-=()
99-
(c >= 0x207a && c <= 0x207e) ||
100-
(c >= 0x208a && c <= 0x208e) ||
101-
102-
# angle symbols
103-
(c >= 0x2220 && c <= 0x2222) || # ∠, ∡, ∢
104-
(c >= 0x299b && c <= 0x29af) || # ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥,
105-
# ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯
106-
# Other_ID_Start
107-
c == 0x2118 || c == 0x212E || # ℘, ℮
108-
(c >= 0x309B && c <= 0x309C)) # katakana-hiragana sound marks
109-
end
110-
1115
function is_identifier_char(c::Char)
1126
c == EOF_CHAR && return false
1137
return Base.is_id_char(c)
@@ -145,55 +39,6 @@ function is_never_id_char(ch::Char)
14539
)
14640
end
14741

148-
function peekchar(io::Base.GenericIOBuffer)
149-
if !io.readable || io.ptr > io.size
150-
return EOF_CHAR
151-
end
152-
ch, _ = readutf(io)
153-
return ch
154-
end
155-
156-
function readutf(io, offset = 0)
157-
ch = convert(UInt8, io.data[io.ptr + offset])
158-
if ch < 0x80
159-
return convert(Char, ch), 0
160-
end
161-
trailing = utf8_trailing(ch + 1)
162-
c::UInt32 = 0
163-
for j = 1:trailing
164-
c += ch
165-
c <<= 6
166-
ch = convert(UInt8, io.data[io.ptr + j + offset])
167-
end
168-
c += ch
169-
c -= utf8_offset[trailing + 1]
170-
return convert(Char, c), trailing
171-
end
172-
173-
function dpeekchar(io::IOBuffer)
174-
if !io.readable || io.ptr > io.size
175-
return EOF_CHAR, EOF_CHAR
176-
end
177-
ch1, trailing = readutf(io)
178-
offset = trailing + 1
179-
180-
if io.ptr + offset > io.size
181-
return ch1, EOF_CHAR
182-
end
183-
ch2, _ = readutf(io, offset)
184-
185-
return ch1, ch2
186-
end
187-
188-
# this implementation is copied from Base
189-
peekchar(s::IOStream) = begin
190-
_CHTMP = Ref{Char}()
191-
if ccall(:ios_peekutf8, Int32, (Ptr{Nothing}, Ptr{Char}), s, _CHTMP) < 0
192-
return EOF_CHAR
193-
end
194-
return _CHTMP[]
195-
end
196-
19742
eof(io::IO) = Base.eof(io)
19843
eof(c::Char) = c === EOF_CHAR
19944

0 commit comments

Comments
 (0)