Skip to content

Commit abf099e

Browse files
authored
Implement char_range() to respect string indices (#486)
Also widen `byte_range()` signatures to accept other integer types
1 parent b444f5a commit abf099e

File tree

5 files changed

+30
-5
lines changed

5 files changed

+30
-5
lines changed

docs/src/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ JuliaSyntax.last_byte
5252
JuliaSyntax.filename
5353
JuliaSyntax.source_line
5454
JuliaSyntax.source_location
55+
JuliaSyntax.char_range
5556
JuliaSyntax.sourcetext
5657
JuliaSyntax.highlight
5758
```

src/JuliaSyntax.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export parsestmt, parseall, parseatom
99
# Tokenization
1010
export tokenize, Token, untokenize
1111
# Source file handling. See also
12-
# highlight() sourcetext() source_line() source_location()
12+
# highlight() sourcetext() source_line() source_location() char_range()
1313
export SourceFile
1414
# Expression heads/kinds. See also
1515
# flags() and related predicates.

src/source_files.jl

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,24 @@ end
1515
"""
1616
byte_range(x)
1717
18-
Return the range of bytes which `x` covers in the source text.
18+
Return the range of bytes which `x` covers in the source text. See also
19+
[`char_range`](@ref).
1920
"""
2021
function byte_range
2122
end
2223

24+
"""
25+
char_range(x)
26+
27+
Compute the range in *character indices* over the source text for syntax object
28+
`x`. If you want to index the source string you need this, rather than
29+
[`byte_range`](@ref).
30+
"""
31+
function char_range(x)
32+
br = byte_range(x)
33+
first(br):thisind(sourcefile(x), last(br))
34+
end
35+
2336
"""
2437
first_byte(x)
2538
@@ -232,11 +245,11 @@ function Base.view(source::SourceFile, rng::AbstractUnitRange)
232245
SubString(source.code, i, j)
233246
end
234247

235-
function Base.getindex(source::SourceFile, i::Int)
248+
function Base.getindex(source::SourceFile, i::Integer)
236249
source.code[i - source.byte_offset]
237250
end
238251

239-
function Base.thisind(source::SourceFile, i::Int)
252+
function Base.thisind(source::SourceFile, i::Integer)
240253
thisind(source.code, i - source.byte_offset) + source.byte_offset
241254
end
242255

test/syntax_tree.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@
5555
@test t.position == 13
5656
@test child(t,1).position == 19
5757
@test child(t,1).val == :b
58+
59+
# Unicode character ranges
60+
src = "ab + αβ"
61+
t = parsestmt(SyntaxNode, src)
62+
@test char_range(t[1]) == 1:2
63+
@test char_range(t[2]) == 4:4
64+
@test char_range(t[3]) == 6:8
65+
# conversely, β takes two bytes so char_range(t[3]) != byte_range(t[3])
66+
@test byte_range(t[3]) == 6:9
5867
end
5968

6069
@testset "SyntaxNode pretty printing" begin

test/test_utils.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ using .JuliaSyntax:
3636
highlight,
3737
tokenize,
3838
untokenize,
39-
filename
39+
filename,
40+
byte_range,
41+
char_range
4042

4143
if VERSION < v"1.6"
4244
# Compat stuff which might not be in Base for older versions

0 commit comments

Comments
 (0)