@@ -111,11 +111,12 @@ makeNestedPrisms ''RegBnf
111111{category-test} = (\\p\{)\q{category}\}|(\\P\{)(\q{category}(\|\q{category})*)\}
112112{char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped}
113113{char-any} = \[\^\]
114- {char-class} = \q{fail}|\q{char-any}|\q{one-of}|(\[\^)\q{char}+(\q{category-test}?\]) |\q{category-test}
114+ {char-class} = \q{fail}|\q{char-any}|\q{one-of}|\q{not-one-of} |\q{category-test}
115115{char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC
116116{char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control}
117117{expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom}
118118{fail} = \[\]
119+ {not-one-of} = (\[\^)\q{char}+(\q{category-test}?\])
119120{one-of} = \[\q{char}+\]
120121{regex} = \q{alternate}
121122{sequence} = \q{char}*|\q{expression}*
@@ -190,10 +191,7 @@ regexGrammar = _RegString >~ ruleRec "regex" altG
190191 [ _Fail >? failG
191192 , _Pass >? anyG
192193 , _OneOf >? oneOfG
193- , _NotOneOf >?
194- terminal " [^" >* several1 noSep charG
195- >*< optionP (NotAsIn Set. empty) catTestG
196- *< terminal " ]"
194+ , _NotOneOf >? notOneOfG
197195 , _NotOneOf >? pure Set. empty >*< catTestG
198196 ]
199197
@@ -203,6 +201,11 @@ regexGrammar = _RegString >~ ruleRec "regex" altG
203201
204202 oneOfG = rule " one-of" $ terminal " [" >* several1 noSep charG *< terminal " ]"
205203
204+ notOneOfG = rule " not-one-of" $
205+ terminal " [^" >* several1 noSep charG
206+ >*< optionP (NotAsIn Set. empty) catTestG
207+ *< terminal " ]"
208+
206209charG :: Grammar Char Char
207210charG = rule " char" $
208211 tokenClass (notOneOf charsReserved >&&< notAsIn Control )
@@ -214,30 +217,94 @@ charG = rule "char" $
214217 charsReserved = " ()*+?[\\ ]^{|}"
215218
216219 charControlG = rule " char-control" $ choiceP
217- [ terminal abbrev >* pure charControl
218- | (abbrev, charControl) <- charsControl
219- ]
220-
221- charsControl =
222- [ (" NUL" , ' \NUL ' ), (" SOH" , ' \SOH ' ), (" STX" , ' \STX ' ), (" ETX" , ' \ETX ' )
223- , (" EOT" , ' \EOT ' ), (" ENQ" , ' \ENQ ' ), (" ACK" , ' \ACK ' ), (" BEL" , ' \BEL ' )
224- , (" BS" , ' \BS ' ), (" HT" , ' \HT ' ), (" LF" , ' \LF ' ), (" VT" , ' \VT ' )
225- , (" FF" , ' \FF ' ), (" CR" , ' \CR ' ), (" SO" , ' \SO ' ), (" SI" , ' \SI ' )
226- , (" DLE" , ' \DLE ' ), (" DC1" , ' \DC1 ' ), (" DC2" , ' \DC2 ' ), (" DC3" , ' \DC3 ' )
227- , (" DC4" , ' \DC4 ' ), (" NAK" , ' \NAK ' ), (" SYN" , ' \SYN ' ), (" ETB" , ' \ETB ' )
228- , (" CAN" , ' \CAN ' ), (" EM" , ' \EM ' ), (" SUB" , ' \SUB ' ), (" ESC" , ' \ESC ' )
229- , (" FS" , ' \FS ' ), (" GS" , ' \GS ' ), (" RS" , ' \RS ' ), (" US" , ' \US ' )
230- , (" DEL" , ' \DEL ' )
231- , (" PAD" , ' \x80 ' ), (" HOP" , ' \x81 ' ), (" BPH" , ' \x82 ' ), (" NBH" , ' \x83 ' )
232- , (" IND" , ' \x84 ' ), (" NEL" , ' \x85 ' ), (" SSA" , ' \x86 ' ), (" ESA" , ' \x87 ' )
233- , (" HTS" , ' \x88 ' ), (" HTJ" , ' \x89 ' ), (" VTS" , ' \x8A ' ), (" PLD" , ' \x8B ' )
234- , (" PLU" , ' \x8C ' ), (" RI" , ' \x8D ' ), (" SS2" , ' \x8E ' ), (" SS3" , ' \x8F ' )
235- , (" DCS" , ' \x90 ' ), (" PU1" , ' \x91 ' ), (" PU2" , ' \x92 ' ), (" STS" , ' \x93 ' )
236- , (" CCH" , ' \x94 ' ), (" MW" , ' \x95 ' ), (" SPA" , ' \x96 ' ), (" EPA" , ' \x97 ' )
237- , (" SOS" , ' \x98 ' ), (" SGCI" ,' \x99 ' ), (" SCI" , ' \x9A ' ), (" CSI" , ' \x9B ' )
238- , (" ST" , ' \x9C ' ), (" OSC" , ' \x9D ' ), (" PM" , ' \x9E ' ), (" APC" , ' \x9F ' )
220+ [ only ' \NUL ' >? terminal " NUL"
221+ , only ' \SOH ' >? terminal " SOH"
222+ , only ' \STX ' >? terminal " STX"
223+ , only ' \ETX ' >? terminal " ETX"
224+ , only ' \EOT ' >? terminal " EOT"
225+ , only ' \ENQ ' >? terminal " ENQ"
226+ , only ' \ACK ' >? terminal " ACK"
227+ , only ' \BEL ' >? terminal " BEL"
228+ , only ' \BS ' >? terminal " BS"
229+ , only ' \HT ' >? terminal " HT"
230+ , only ' \LF ' >? terminal " LF"
231+ , only ' \VT ' >? terminal " VT"
232+ , only ' \FF ' >? terminal " FF"
233+ , only ' \CR ' >? terminal " CR"
234+ , only ' \SO ' >? terminal " SO"
235+ , only ' \SI ' >? terminal " SI"
236+ , only ' \DLE ' >? terminal " DLE"
237+ , only ' \DC1 ' >? terminal " DC1"
238+ , only ' \DC2 ' >? terminal " DC2"
239+ , only ' \DC3 ' >? terminal " DC3"
240+ , only ' \DC4 ' >? terminal " DC4"
241+ , only ' \NAK ' >? terminal " NAK"
242+ , only ' \SYN ' >? terminal " SYN"
243+ , only ' \ETB ' >? terminal " ETB"
244+ , only ' \CAN ' >? terminal " CAN"
245+ , only ' \EM ' >? terminal " EM"
246+ , only ' \SUB ' >? terminal " SUB"
247+ , only ' \ESC ' >? terminal " ESC"
248+ , only ' \FS ' >? terminal " FS"
249+ , only ' \GS ' >? terminal " GS"
250+ , only ' \RS ' >? terminal " RS"
251+ , only ' \US ' >? terminal " US"
252+ , only ' \DEL ' >? terminal " DEL"
253+ , only ' \x80 ' >? terminal " PAD"
254+ , only ' \x81 ' >? terminal " HOP"
255+ , only ' \x82 ' >? terminal " BPH"
256+ , only ' \x83 ' >? terminal " NBH"
257+ , only ' \x84 ' >? terminal " IND"
258+ , only ' \x85 ' >? terminal " NEL"
259+ , only ' \x86 ' >? terminal " SSA"
260+ , only ' \x87 ' >? terminal " ESA"
261+ , only ' \x88 ' >? terminal " HTS"
262+ , only ' \x89 ' >? terminal " HTJ"
263+ , only ' \x8A ' >? terminal " VTS"
264+ , only ' \x8B ' >? terminal " PLD"
265+ , only ' \x8C ' >? terminal " PLU"
266+ , only ' \x8D ' >? terminal " RI"
267+ , only ' \x8E ' >? terminal " SS2"
268+ , only ' \x8F ' >? terminal " SS3"
269+ , only ' \x90 ' >? terminal " DCS"
270+ , only ' \x91 ' >? terminal " PU1"
271+ , only ' \x92 ' >? terminal " PU2"
272+ , only ' \x93 ' >? terminal " STS"
273+ , only ' \x94 ' >? terminal " CCH"
274+ , only ' \x95 ' >? terminal " MW"
275+ , only ' \x96 ' >? terminal " SPA"
276+ , only ' \x97 ' >? terminal " EPA"
277+ , only ' \x98 ' >? terminal " SOS"
278+ , only ' \x99 ' >? terminal " SGCI"
279+ , only ' \x9A ' >? terminal " SCI"
280+ , only ' \x9B ' >? terminal " CSI"
281+ , only ' \x9C ' >? terminal " ST"
282+ , only ' \x9D ' >? terminal " OSC"
283+ , only ' \x9E ' >? terminal " PM"
284+ , only ' \x9F ' >? terminal " APC"
239285 ]
240286
287+ {- |
288+ >>> putStringLn (regbnfG regbnfGrammar)
289+ {start} = \q{regbnf}
290+ {alternate} = \q{sequence}(\|\q{sequence})*
291+ {atom} = (\\q\{)\q{char}*\}|\q{char}|\q{char-class}|\(\q{regex}\)
292+ {category} = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn
293+ {category-test} = (\\p\{)\q{category}\}|(\\P\{)(\q{category}(\|\q{category})*)\}
294+ {char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped}
295+ {char-any} = \[\^\]
296+ {char-class} = \q{fail}|\q{char-any}|\q{one-of}|\q{not-one-of}|\q{category-test}
297+ {char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC
298+ {char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control}
299+ {expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom}
300+ {fail} = \[\]
301+ {not-one-of} = (\[\^)\q{char}+(\q{category-test}?\])
302+ {one-of} = \[\q{char}+\]
303+ {regbnf} = (\{start\} = )\q{regex}(\LF\q{rule})*
304+ {regex} = \q{alternate}
305+ {rule} = \{\q{char}*(\} = )\q{regex}
306+ {sequence} = \q{char}*|\q{expression}*
307+ -}
241308regbnfGrammar :: Grammar Char RegBnf
242309regbnfGrammar = rule " regbnf" $ _RegBnf . _Bnf >~
243310 terminal " {start} = " >* regexGrammar
0 commit comments