@@ -22,9 +22,6 @@ mutable struct Regex
2222 compile_options:: UInt32
2323 match_options:: UInt32
2424 regex:: Ptr{Cvoid}
25- extra:: Ptr{Cvoid}
26- ovec:: Vector{Csize_t}
27- match_data:: Ptr{Cvoid}
2825
2926 function Regex (pattern:: AbstractString , compile_options:: Integer ,
3027 match_options:: Integer )
@@ -37,11 +34,9 @@ mutable struct Regex
3734 if (match_options & ~ PCRE. EXECUTE_MASK) != 0
3835 throw (ArgumentError (" invalid regex match options: $match_options " ))
3936 end
40- re = compile (new (pattern, compile_options, match_options, C_NULL ,
41- C_NULL , Csize_t[], C_NULL ))
37+ re = compile (new (pattern, compile_options, match_options, C_NULL ))
4238 finalizer (re) do re
4339 re. regex == C_NULL || PCRE. free_re (re. regex)
44- re. match_data == C_NULL || PCRE. free_match_data (re. match_data)
4540 end
4641 re
4742 end
@@ -68,8 +63,6 @@ function compile(regex::Regex)
6863 if regex. regex == C_NULL
6964 regex. regex = PCRE. compile (regex. pattern, regex. compile_options)
7065 PCRE. jit_compile (regex. regex)
71- regex. match_data = PCRE. create_match_data (regex. regex)
72- regex. ovec = PCRE. get_ovec (regex. match_data)
7366 end
7467 regex
7568end
@@ -164,14 +157,12 @@ getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]
164157
165158function occursin (r:: Regex , s:: AbstractString ; offset:: Integer = 0 )
166159 compile (r)
167- return PCRE. exec (r. regex, String (s), offset, r. match_options,
168- r. match_data)
160+ return PCRE. exec_r (r. regex, String (s), offset, r. match_options)
169161end
170162
171163function occursin (r:: Regex , s:: SubString ; offset:: Integer = 0 )
172164 compile (r)
173- return PCRE. exec (r. regex, s, offset, r. match_options,
174- r. match_data)
165+ return PCRE. exec_r (r. regex, s, offset, r. match_options)
175166end
176167
177168"""
@@ -198,14 +189,12 @@ true
198189"""
199190function startswith (s:: AbstractString , r:: Regex )
200191 compile (r)
201- return PCRE. exec (r. regex, String (s), 0 , r. match_options | PCRE. ANCHORED,
202- r. match_data)
192+ return PCRE. exec_r (r. regex, String (s), 0 , r. match_options | PCRE. ANCHORED)
203193end
204194
205195function startswith (s:: SubString , r:: Regex )
206196 compile (r)
207- return PCRE. exec (r. regex, s, 0 , r. match_options | PCRE. ANCHORED,
208- r. match_data)
197+ return PCRE. exec_r (r. regex, s, 0 , r. match_options | PCRE. ANCHORED)
209198end
210199
211200"""
@@ -232,14 +221,12 @@ true
232221"""
233222function endswith (s:: AbstractString , r:: Regex )
234223 compile (r)
235- return PCRE. exec (r. regex, String (s), 0 , r. match_options | PCRE. ENDANCHORED,
236- r. match_data)
224+ return PCRE. exec_r (r. regex, String (s), 0 , r. match_options | PCRE. ENDANCHORED)
237225end
238226
239227function endswith (s:: SubString , r:: Regex )
240228 compile (r)
241- return PCRE. exec (r. regex, s, 0 , r. match_options | PCRE. ENDANCHORED,
242- r. match_data)
229+ return PCRE. exec_r (r. regex, s, 0 , r. match_options | PCRE. ENDANCHORED)
243230end
244231
245232"""
@@ -274,36 +261,52 @@ function match end
274261function match (re:: Regex , str:: Union{SubString{String}, String} , idx:: Integer , add_opts:: UInt32 = UInt32 (0 ))
275262 compile (re)
276263 opts = re. match_options | add_opts
277- if ! PCRE. exec (re. regex, str, idx- 1 , opts, re. match_data)
264+ matched, data = PCRE. exec_r_data (re. regex, str, idx- 1 , opts)
265+ if ! matched
266+ PCRE. free_match_data (data)
278267 return nothing
279268 end
280- ovec = re. ovec
281- n = div (length (ovec),2 ) - 1
282- mat = SubString (str, ovec[1 ]+ 1 , prevind (str, ovec[2 ]+ 1 ))
283- cap = Union{Nothing,SubString{String}}[ovec[2 i+ 1 ] == PCRE. UNSET ? nothing :
284- SubString (str, ovec[2 i+ 1 ]+ 1 ,
285- prevind (str, ovec[2 i+ 2 ]+ 1 )) for i= 1 : n]
286- off = Int[ ovec[2 i+ 1 ]+ 1 for i= 1 : n ]
287- RegexMatch (mat, cap, ovec[1 ]+ 1 , off, re)
269+ n = div (PCRE. ovec_length (data), 2 ) - 1
270+ p = PCRE. ovec_ptr (data)
271+ mat = SubString (str, unsafe_load (p, 1 )+ 1 , prevind (str, unsafe_load (p, 2 )+ 1 ))
272+ cap = Union{Nothing,SubString{String}}[unsafe_load (p,2 i+ 1 ) == PCRE. UNSET ? nothing :
273+ SubString (str, unsafe_load (p,2 i+ 1 )+ 1 ,
274+ prevind (str, unsafe_load (p,2 i+ 2 )+ 1 )) for i= 1 : n]
275+ off = Int[ unsafe_load (p,2 i+ 1 )+ 1 for i= 1 : n ]
276+ result = RegexMatch (mat, cap, unsafe_load (p,1 )+ 1 , off, re)
277+ PCRE. free_match_data (data)
278+ return result
288279end
289280
290281match (r:: Regex , s:: AbstractString ) = match (r, s, firstindex (s))
291282match (r:: Regex , s:: AbstractString , i:: Integer ) = throw (ArgumentError (
292283 " regex matching is only available for the String type; use String(s) to convert"
293284))
294285
286+ findnext (re:: Regex , str:: Union{String,SubString} , idx:: Integer ) = _findnext_re (re, str, idx, C_NULL )
287+
295288# TODO : return only start index and update deprecation
296- function findnext (re:: Regex , str:: Union{String,SubString} , idx:: Integer )
289+ function _findnext_re (re:: Regex , str:: Union{String,SubString} , idx:: Integer , match_data :: Ptr{Cvoid} )
297290 if idx > nextind (str,lastindex (str))
298291 throw (BoundsError ())
299292 end
300293 opts = re. match_options
301294 compile (re)
302- if PCRE. exec (re. regex, str, idx- 1 , opts, re. match_data)
303- (Int (re. ovec[1 ])+ 1 ): prevind (str,Int (re. ovec[2 ])+ 1 )
295+ alloc = match_data == C_NULL
296+ if alloc
297+ matched, data = PCRE. exec_r_data (re. regex, str, idx- 1 , opts)
298+ else
299+ matched = PCRE. exec (re. regex, str, idx- 1 , opts, match_data)
300+ data = match_data
301+ end
302+ if matched
303+ p = PCRE. ovec_ptr (data)
304+ ans = (Int (unsafe_load (p,1 ))+ 1 ): prevind (str,Int (unsafe_load (p,2 ))+ 1 )
304305 else
305- nothing
306+ ans = nothing
306307 end
308+ alloc && PCRE. free_match_data (data)
309+ return ans
307310end
308311findnext (r:: Regex , s:: AbstractString , idx:: Integer ) = throw (ArgumentError (
309312 " regex search is only available for the String type; use String(s) to convert"
@@ -384,9 +387,23 @@ julia> replace(msg, r"#(.+)# from (?<from>\\w+)" => s"FROM: \\g<from>; MESSAGE:
384387"""
385388macro s_str (string) SubstitutionString (string) end
386389
390+ # replacement
391+
392+ struct RegexAndMatchData
393+ re:: Regex
394+ match_data:: Ptr{Cvoid}
395+ RegexAndMatchData (re:: Regex ) = (compile (re); new (re, PCRE. create_match_data (re. regex)))
396+ end
397+
398+ findnext (pat:: RegexAndMatchData , str, i) = _findnext_re (pat. re, str, i, pat. match_data)
399+
400+ _pat_replacer (r:: Regex ) = RegexAndMatchData (r)
401+
402+ _free_pat_replacer (r:: RegexAndMatchData ) = PCRE. free_match_data (r. match_data)
403+
387404replace_err (repl) = error (" Bad replacement string: $repl " )
388405
389- function _write_capture (io, re, group)
406+ function _write_capture (io, re:: RegexAndMatchData , group)
390407 len = PCRE. substring_length_bynumber (re. match_data, group)
391408 ensureroom (io, len+ 1 )
392409 PCRE. substring_copy_bynumber (re. match_data, group,
@@ -395,7 +412,7 @@ function _write_capture(io, re, group)
395412 io. size = max (io. size, io. ptr - 1 )
396413end
397414
398- function _replace (io, repl_s:: SubstitutionString , str, r, re)
415+ function _replace (io, repl_s:: SubstitutionString , str, r, re:: RegexAndMatchData )
399416 SUB_CHAR = ' \\ '
400417 GROUP_CHAR = ' g'
401418 LBRACKET = ' <'
@@ -439,8 +456,8 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
439456 if all (isdigit, groupname)
440457 _write_capture (io, re, parse (Int, groupname))
441458 else
442- group = PCRE. substring_number_from_name (re. regex, groupname)
443- group < 0 && replace_err (" Group $groupname not found in regex $re " )
459+ group = PCRE. substring_number_from_name (re. re . regex, groupname)
460+ group < 0 && replace_err (" Group $groupname not found in regex $(re . re) " )
444461 _write_capture (io, re, group)
445462 end
446463 i = nextind (repl, i)
0 commit comments