@@ -18,7 +18,7 @@ function Base.endswith(s::DenseStringViewAndSub, r::Regex)
18
18
return PCRE. exec_r (r. regex, s, 0 , r. match_options | PCRE. ENDANCHORED)
19
19
end
20
20
21
- function Base. match (re:: Regex , str:: DenseStringViewAndSub , idx:: Integer , add_opts:: UInt32 = UInt32 (0 ))
21
+ function Base. match (re:: Regex , str:: T , idx:: Integer , add_opts:: UInt32 = UInt32 (0 )) where {T <: DenseStringViewAndSub }
22
22
Base. compile (re)
23
23
opts = re. match_options | add_opts
24
24
matched, data = PCRE. exec_r_data (re. regex, str, idx- 1 , opts)
@@ -29,11 +29,11 @@ function Base.match(re::Regex, str::DenseStringViewAndSub, idx::Integer, add_opt
29
29
n = div (PCRE. ovec_length (data), 2 ) - 1
30
30
p = PCRE. ovec_ptr (data)
31
31
mat = SubString (str, unsafe_load (p, 1 )+ 1 , prevind (str, unsafe_load (p, 2 )+ 1 ))
32
- cap = Union{Nothing,SubString{String }}[unsafe_load (p,2 i+ 1 ) == PCRE. UNSET ? nothing :
32
+ cap = Union{Nothing,SubString{T }}[unsafe_load (p,2 i+ 1 ) == PCRE. UNSET ? nothing :
33
33
SubString (str, unsafe_load (p,2 i+ 1 )+ 1 ,
34
34
prevind (str, unsafe_load (p,2 i+ 2 )+ 1 )) for i= 1 : n]
35
35
off = Int[ unsafe_load (p,2 i+ 1 )+ 1 for i= 1 : n ]
36
- result = RegexMatch (mat, cap, unsafe_load (p,1 )+ 1 , off, re)
36
+ result = SVRegexMatch (mat, cap, unsafe_load (p,1 )+ 1 , off, re)
37
37
PCRE. free_match_data (data)
38
38
return result
39
39
end
@@ -70,7 +70,8 @@ struct RegexMatchIterator{T<:DenseStringViewAndSub}
70
70
overlap:: Bool
71
71
end
72
72
Base. compile (itr:: RegexMatchIterator ) = (compile (itr. regex); itr)
73
- Base. eltype (:: Type{<:RegexMatchIterator} ) = RegexMatch
73
+ Base. eltype (:: Type{RegexMatchIterator{T}} ) where {T<: DenseStringView } = SVRegexMatch{T}
74
+ Base. eltype (:: Type{RegexMatchIterator{SubString{T}}} ) where {T<: DenseStringView } = SVRegexMatch{T}
74
75
Base. IteratorSize (:: Type{<:RegexMatchIterator} ) = Base. SizeUnknown ()
75
76
76
77
function Base. iterate (itr:: RegexMatchIterator , (offset,prevempty)= (1 ,false ))
@@ -115,3 +116,80 @@ function PCRE.exec(re, subject::DenseStringViewAndSub, offset, options, match_da
115
116
rc < - 2 && error (" PCRE.exec error: $(PCRE. err_message (rc)) " )
116
117
return rc >= 0
117
118
end
119
+
120
+ # ####################################################################
121
+ # need to duplicate this code from Base because of julia#48617:
122
+ """
123
+ SVRegexMatch <: AbstractMatch
124
+
125
+ This type is identical to `RegexMatch` (in Julia `Base`) except that the
126
+ `match` is a `SubString` of a `StringView` instead of a `String`.
127
+
128
+ A type representing a single match to a `Regex` found in a string.
129
+ Typically created from the [`match`](@ref) function.
130
+
131
+ * The `match` field stores the substring of the entire matched string.
132
+ * The `captures` field stores the substrings for each capture group, indexed by number.
133
+ To index by capture group name, the entire match object should be indexed instead,
134
+ as shown in the examples.
135
+ * The location of the start of the match is stored in the `offset` field.
136
+ * The `offsets` field stores the locations of the start of each capture group,
137
+ with 0 denoting a group that was not captured.
138
+
139
+ This type can be used as an iterator over the capture groups of the `Regex`,
140
+ yielding the substrings captured in each group.
141
+ Because of this, the captures of a match can be destructured.
142
+ If a group was not captured, `nothing` will be yielded instead of a substring.
143
+ """
144
+ struct SVRegexMatch{T<: DenseStringView } <: AbstractMatch
145
+ match:: SubString{T}
146
+ captures:: Vector{Union{Nothing,SubString{T}}}
147
+ offset:: Int
148
+ offsets:: Vector{Int}
149
+ regex:: Regex
150
+ end
151
+ SVRegexMatch (match:: SubString{T} , captures, offset, offsets, regex) where {T<: DenseStringViewAndSub } =
152
+ SVRegexMatch {T} (match, captures, offset, offsets, regex)
153
+
154
+ function Base. keys (m:: SVRegexMatch )
155
+ idx_to_capture_name = PCRE. capture_names (m. regex. regex)
156
+ return map (eachindex (m. captures)) do i
157
+ # If the capture group is named, return it's name, else return it's index
158
+ get (idx_to_capture_name, i, i)
159
+ end
160
+ end
161
+
162
+ function Base. show (io:: IO , m:: SVRegexMatch )
163
+ print (io, " SVRegexMatch(" )
164
+ show (io, m. match)
165
+ capture_keys = keys (m)
166
+ if ! isempty (capture_keys)
167
+ print (io, " , " )
168
+ for (i, capture_name) in enumerate (capture_keys)
169
+ print (io, capture_name, " =" )
170
+ show (io, m. captures[i])
171
+ if i < length (m)
172
+ print (io, " , " )
173
+ end
174
+ end
175
+ end
176
+ print (io, " )" )
177
+ end
178
+
179
+ # Capture group extraction
180
+ Base. getindex (m:: SVRegexMatch , idx:: Integer ) = m. captures[idx]
181
+ function Base. getindex (m:: SVRegexMatch , name:: Union{AbstractString,Symbol} )
182
+ idx = PCRE. substring_number_from_name (m. regex. regex, name)
183
+ idx <= 0 && error (" no capture group named $name found in regex" )
184
+ m[idx]
185
+ end
186
+
187
+ Base. haskey (m:: SVRegexMatch , idx:: Integer ) = idx in eachindex (m. captures)
188
+ function Base. haskey (m:: SVRegexMatch , name:: Union{AbstractString,Symbol} )
189
+ idx = PCRE. substring_number_from_name (m. regex. regex, name)
190
+ return idx > 0
191
+ end
192
+
193
+ Base. iterate (m:: SVRegexMatch , args... ) = iterate (m. captures, args... )
194
+ Base. length (m:: SVRegexMatch ) = length (m. captures)
195
+ Base. eltype (m:: SVRegexMatch ) = eltype (m. captures)
0 commit comments