11"""
22 PatternFreeString{T<:String} <: AbstractPointProcess{T}
33
4- Container with fields `alphabet` and `pattern` used to generate a string made of characters from `alphabet` avoiding the prescribed `pattern`.
4+ Struct with fields
5+ - `alphabet::Vector{Char}`,
6+ - `pattern::Regex`,
7+ used to generate strings made of characters from `alphabet` avoiding the prescribed `pattern`.
58"""
69struct PatternFreeString{T<: String } <: AbstractPointProcess{T}
7- alphabet:: Vector{T }
8- pattern:: T
10+ alphabet:: Vector{Char }
11+ pattern:: Regex
912end
1013
1114function Base. show (io:: IO , pp:: PatternFreeString{T} ) where {T}
12- print (io, " PatternFreeString{$T }\n - alphabet = $(pp. alphabet) \n - pattern = $(pp. pattern) " )
15+ print (io, " PatternFreeString{$T }\n - alphabet = $(pp. alphabet) \n - pattern = $(pp. pattern. pattern ) " )
1316end
1417
1518"""
@@ -19,22 +22,22 @@ Construct a [`PRS.PatternFreeString`](@ref).
1922
2023```jldoctest; output = true
2124using PartialRejectionSampling
22- PRS.PatternFreeString(["A", "C", "G", "T" ], "ATGTA")
25+ PRS.PatternFreeString(['A', 'C', 'G', 'T' ], "ATGTA")
2326
2427# output
2528
2629PatternFreeString{String}
27- - alphabet = ["A", "C", "G", "T" ]
30+ - alphabet = ['A', 'C', 'G', 'T' ]
2831- pattern = ATGTA
2932```
3033"""
31- function PatternFreeString (alphabet:: Vector{String } , pattern:: String )
34+ function PatternFreeString (alphabet:: Vector{Char } , pattern:: String )
3235 @assert ! isempty (alphabet)
3336 @assert ! isempty (pattern)
34- for p in split ( pattern, " " )
35- p ∉ alphabet && throw (DomainError (p , " pattern is not made of characters from alphabet" ))
37+ if ! issubset ( Vector {Char} ( pattern), alphabet )
38+ throw (DomainError (pattern , " pattern $(pattern) is not fully made of characters from alphabet $(alphabet) " ))
3639 end
37- return PatternFreeString {String} (alphabet, pattern)
40+ return PatternFreeString {String} (alphabet, Regex ( pattern) )
3841end
3942
4043"""
6770 size::Int
6871 )::T where {T<:String}
6972
70- Generate a string uniformly at random among all strings made of characters from `pp.alphabet` with no occurence of the pattern `pp.pattern`, using a tailored version of Partial Rejection Sampling (PRS) derived by [GiAmWe18](@cite).
73+ Generate a string uniformly at random among all strings made of characters from `pp.alphabet` with no occurence of the pattern `pp.pattern`, using a tailored version of Partial Rejection Sampling (PRS).
74+
75+ **See also**
76+ - Technical report of [GiAmWe18](@cite)
7177
7278```@example
7379using PartialRejectionSampling
74- pp = PRS.PatternFreeString(["A", "C", "G", "T" ], "ATGTA")
80+ pp = PRS.PatternFreeString(['A', 'C', 'G', 'T' ], "ATGTA")
7581PRS.generate_sample_prs(pp, 20)
7682```
7783"""
@@ -81,162 +87,175 @@ function generate_sample_prs(
8187 size:: Int
8288):: T where {T<: String }
8389 @assert size > 0
84- return _generate_sample_pattern_free_string_prs (rng, pp. alphabet, pp. pattern, size)
90+ @assert ! isempty (pp. alphabet)
91+ @assert ! isempty (pp. pattern. pattern)
92+ return _pattern_free_string_prs (rng, pp. alphabet, pp. pattern, size)
8593end
8694
87- # function generate_sample_prs(
88- # pp::PatternFreeString,
89- # size::Int
90- # )
91- # return generate_sample(Random.default_rng(), pp, size)
92- # end
93-
9495"""
9596 _generate_pattern_free_string_prs(
9697 rng::Random.AbstractRNG,
97- alphabet::Vector{T },
98- pattern::T ,
98+ alphabet::Vector{Char },
99+ pattern::Regex ,
99100 size::Int
100- )::T where {T<:AbstractString}
101+ )::String
101102
102103Generate a string uniformly at random among all strings made of characters from `alphabet` with no occurence of the pattern `pattern`, using a tailored version of Partial Rejection Sampling (PRS) derived by [GiAmWe18](@cite)
103104"""
104- function _generate_sample_pattern_free_string_prs (
105+ function _pattern_free_string_prs (
105106 rng:: Random.AbstractRNG ,
106- alphabet:: Vector{T } ,
107- pattern :: T ,
107+ alphabet:: Vector{Char } ,
108+ regex :: Regex ,
108109 size:: Int
109- ):: T where {T<: AbstractString }
110-
111- @assert size > 0
112- @assert ! isempty (alphabet)
113- @assert ! isempty (pattern)
114-
115- pref_suff = find_prefix_suffix (pattern)
116-
117- pp = fill (" " , size)
118- resample_indices = Set (1 : size)
119-
120- while ! isempty (resample_indices)
121- generate_sample! (rng, pp, resample_indices, alphabet)
122- resample_indices = find_characters_to_resample (pp, pattern, pref_suff)
110+ ):: String
111+ pref_suff = find_prefix_suffix (regex. pattern)
112+ if isempty (pref_suff)
113+ return _pattern_free_string_prs_extremal (rng, alphabet, regex, size)
114+ else
115+ return _pattern_free_string_prs_general (rng, alphabet, regex, size, pref_suff)
123116 end
124- return join (pp)
125117end
126118
127- find_prefix_suffix (s:: String ) = [i for i in 1 : div (length (s), 2 ) if s[1 : i] == s[end - i+ 1 : end ]]
128-
129- """
130- generate_sample!(
131- [rng::Random.AbstractRNG,]
132- string_vec::Vector{T},
133- indices,
134- alphabet::Vector{T}
135- ) where {T<:AbstractString}
119+ # # Extremal PRS
136120
137- Generate a character uniformly at random from `alphabet` at positions prescribed by `indices` in `string_vec`.
138- """
139- function generate_sample! (
140- rng:: Random.AbstractRNG ,
141- string_vec:: Vector{T} ,
142- indices,
143- alphabet:: Vector{T}
144- ) where {T<: AbstractString }
145- for i in indices
146- string_vec[i] = rand (rng, alphabet)
121+ function _pattern_free_string_prs_extremal (rng, alphabet, regex, size)
122+ str_vec = rand (rng, alphabet, size)
123+ while true
124+ str = join (str_vec)
125+ bad_ranges = eachmatch_ranges (regex, str)
126+ isempty (bad_ranges) && return str
127+ for b in bad_ranges
128+ str_vec[b] .= rand (rng, alphabet, length (b))
129+ end
147130 end
148131end
149132
150- """
151- find_bad_ranges(
152- pattern::T,
153- string::T
154- )::Vector{UnitRange} where {T<:AbstractString}
133+ # # General PRS
155134
156- Identify where `pattern` occur in `string` and return the corresponding ranges of indices.
157- """
158- function find_bad_ranges (
159- pattern:: T ,
160- string:: T
161- ):: Vector{UnitRange} where {T<: AbstractString }
162-
163- bad_ranges = UnitRange{Int64}[]
164-
165- matches = eachmatch (Regex (pattern), string, overlap= true )
166- isempty (matches) && return bad_ranges
167-
168- p = length (pattern)
169- m, _ = iterate (matches)
170- x1, y1 = m. offset, m. offset + p - 1
171-
172- for m in Iterators. drop (matches, 1 )
173- x2, y2 = m. offset, m. offset + p - 1
174- if x2 <= y1 + 1
175- y1 = y2
176- else
177- push! (bad_ranges, x1: y1)
178- x1, y1 = x2, y2
179- end
180- end
181- push! (bad_ranges, x1: y1)
182- return bad_ranges
183- end
135+ function _pattern_free_string_prs_general (rng, alphabet, regex, size, pref_suff)
136+ throw (DomainError (regex. pattern, " Generalized PRS is not yet implemented for pattern free strings" ))
137+ # pp = fill("", size)
138+ # resample_indices = Set(1:size)
184139
185- """
186- find_characters_to_resample(
187- string_vec::Vector{T},
188- pattern::T,
189- pref_suff::Vector{U}
190- )::Vector{U} where {T<:String, U<:Int}
140+ # while !isempty(resample_indices)
141+ # generate_sample!(rng, pp, resample_indices, alphabet)
142+ # resample_indices = find_characters_to_resample(pp, regex, pref_suff)
143+ # end
144+ # return join(pp)
145+ end
191146
192- Identify the set of events to be resampled as constructed by Algorithm 5 in [GuJeLi19](@cite) as part of the Partial Rejection Sampling (PRS) method.
193- Return the indices of the variables involved in the corresponding events.
147+ # """
148+ # generate_sample!(
149+ # rng::Random.AbstractRNG,
150+ # string_vec::Vector{T},
151+ # indices,
152+ # alphabet::Vector{T}
153+ # ) where {T<:AbstractString}
154+
155+ # Generate a character uniformly at random from `alphabet` at positions prescribed by `indices` in `string_vec`.
156+ # """
157+ # function generate_sample!(
158+ # rng::Random.AbstractRNG,
159+ # string_vec::Vector{T},
160+ # indices,
161+ # alphabet::Vector{T}
162+ # ) where {T<:AbstractString}
163+ # for i in indices
164+ # string_vec[i] = rand(rng, alphabet)
165+ # end
166+ # end
194167
195- **See also**
168+ # """
169+ # find_bad_ranges(
170+ # pattern::T,
171+ # string::T
172+ # )::Vector{UnitRange} where {T<:AbstractString}
173+
174+ # Identify where `pattern` occur in `string` and return the corresponding ranges of indices.
175+ # """
176+ # function find_bad_ranges(
177+ # pattern::T,
178+ # string::T
179+ # )::Vector{UnitRange} where {T<:AbstractString}
180+
181+ # bad_ranges = UnitRange{Int64}[]
182+
183+ # matches = eachmatch(Regex(pattern), string, overlap=true)
184+ # isempty(matches) && return bad_ranges
185+
186+ # p = length(pattern)
187+ # m, _ = iterate(matches)
188+
189+ # x1, y1 = m.offset, m.offset + p - 1
190+ # for m in Iterators.drop(matches, 1)
191+ # x2, y2 = m.offset, m.offset + p - 1
192+ # if x2 <= y1 + 1
193+ # y1 = y2
194+ # else
195+ # push!(bad_ranges, x1:y1)
196+ # x1, y1 = x2, y2
197+ # end
198+ # end
199+ # push!(bad_ranges, x1:y1)
200+
201+ # return bad_ranges
202+ # end
196203
197- - [`PRS.find_bad_ranges`](@ref)
198- """
199- function find_characters_to_resample (
200- string_vec:: Vector{T} ,
201- pattern:: T ,
202- pref_suff:: Vector{U}
203- ):: Vector{U} where {T<: String , U<: Int }
204-
205- # Extremal case
206- isempty (pref_suff) && return vcat (findall (pattern, join (string_vec), overlap= false )... )
207-
208- # General case
209- bad_ranges = find_bad_ranges (pattern, join (string_vec))
210- isempty (bad_ranges) && return vcat (bad_ranges... )
211-
212- p, s = length (pattern), length (string_vec)
213- tmp = fill (" " , s)
214-
215- for bad_range in bad_ranges
216- tmp[bad_range] = string_vec[bad_range]
217- start, stop = bad_range. start, bad_range. stop
218- for ps in pref_suff
219- flag_left = flag_right = false
220- if ! flag_left
221- I = (start - p + ps): (start - 1 )
222- if I. start >= 1
223- flag_left = startswith (pattern, join (tmp[I]))
224- if flag_left
225- tmp[I] = string_vec[I]
226- end
227- end
228- end
229- if ! flag_right
230- J = (stop + 1 ): (stop + p - ps)
231- if J. stop <= s
232- flag_right = endswith (pattern, join (tmp[J]))
233- if flag_right
234- tmp[J] = string_vec[J]
235- end
236- end
237- end
238- flag_left && flag_right && break
239- end
240- end
241- return findall (! isempty, tmp)
242- end
204+ # """
205+ # find_characters_to_resample(
206+ # string_vec::Vector{T},
207+ # pattern::T,
208+ # pref_suff::Vector{U}
209+ # )::Vector{U} where {T<:String, U<:Int}
210+
211+ # Identify the set of events to be resampled as constructed by Algorithm 5 in [GuJeLi19](@cite) as part of the Partial Rejection Sampling (PRS) method.
212+ # Return the indices of the variables involved in the corresponding events.
213+
214+ # **See also**
215+
216+ # - [`PRS.find_bad_ranges`](@ref)
217+ # """
218+ # function find_characters_to_resample(
219+ # string_vec::Vector{T},
220+ # pattern::T,
221+ # pref_suff::Vector{U}
222+ # )::Vector{U} where {T<:String, U<:Int}
223+
224+ # # Extremal case
225+ # isempty(pref_suff) && return vcat(findall(pattern, join(string_vec), overlap=false)...)
226+
227+ # # General case
228+ # bad_ranges = find_bad_ranges(pattern, join(string_vec))
229+ # isempty(bad_ranges) && return vcat(bad_ranges...)
230+
231+ # p, n = length(pattern), length(string_vec)
232+ # tmp = fill("", n)
233+
234+ # for bad_range in bad_ranges
235+ # tmp[bad_range] = string_vec[bad_range]
236+ # start, stop = bad_range.start, bad_range.stop
237+ # for ps in pref_suff
238+ # flag_left = flag_right = false
239+ # if !flag_left
240+ # I = (start - p + ps):(start - 1)
241+ # if I.start >= 1
242+ # flag_left = startswith(pattern, join(tmp[I]))
243+ # if flag_left
244+ # tmp[I] = string_vec[I]
245+ # end
246+ # end
247+ # end
248+ # if !flag_right
249+ # J = (stop + 1):(stop + p - ps)
250+ # if J.stop <= n
251+ # flag_right = endswith(pattern, join(tmp[J]))
252+ # if flag_right
253+ # tmp[J] = string_vec[J]
254+ # end
255+ # end
256+ # end
257+ # flag_left && flag_right && break
258+ # end
259+ # end
260+ # return findall(!isempty, tmp)
261+ # end
0 commit comments