Skip to content

Commit 12e3cad

Browse files
committed
Update Grammar.hs
1 parent cdc4121 commit 12e3cad

File tree

1 file changed

+94
-27
lines changed

1 file changed

+94
-27
lines changed

src/Control/Lens/Grammar.hs

Lines changed: 94 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,12 @@ makeNestedPrisms ''RegBnf
111111
{category-test} = (\\p\{)\q{category}\}|(\\P\{)(\q{category}(\|\q{category})*)\}
112112
{char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped}
113113
{char-any} = \[\^\]
114-
{char-class} = \q{fail}|\q{char-any}|\q{one-of}|(\[\^)\q{char}+(\q{category-test}?\])|\q{category-test}
114+
{char-class} = \q{fail}|\q{char-any}|\q{one-of}|\q{not-one-of}|\q{category-test}
115115
{char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC
116116
{char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control}
117117
{expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom}
118118
{fail} = \[\]
119+
{not-one-of} = (\[\^)\q{char}+(\q{category-test}?\])
119120
{one-of} = \[\q{char}+\]
120121
{regex} = \q{alternate}
121122
{sequence} = \q{char}*|\q{expression}*
@@ -190,10 +191,7 @@ regexGrammar = _RegString >~ ruleRec "regex" altG
190191
[ _Fail >? failG
191192
, _Pass >? anyG
192193
, _OneOf >? oneOfG
193-
, _NotOneOf >?
194-
terminal "[^" >* several1 noSep charG
195-
>*< optionP (NotAsIn Set.empty) catTestG
196-
*< terminal "]"
194+
, _NotOneOf >? notOneOfG
197195
, _NotOneOf >? pure Set.empty >*< catTestG
198196
]
199197

@@ -203,6 +201,11 @@ regexGrammar = _RegString >~ ruleRec "regex" altG
203201

204202
oneOfG = rule "one-of" $ terminal "[" >* several1 noSep charG *< terminal "]"
205203

204+
notOneOfG = rule "not-one-of" $
205+
terminal "[^" >* several1 noSep charG
206+
>*< optionP (NotAsIn Set.empty) catTestG
207+
*< terminal "]"
208+
206209
charG :: Grammar Char Char
207210
charG = rule "char" $
208211
tokenClass (notOneOf charsReserved >&&< notAsIn Control)
@@ -214,30 +217,94 @@ charG = rule "char" $
214217
charsReserved = "()*+?[\\]^{|}"
215218

216219
charControlG = rule "char-control" $ choiceP
217-
[ terminal abbrev >* pure charControl
218-
| (abbrev, charControl) <- charsControl
219-
]
220-
221-
charsControl =
222-
[ ("NUL", '\NUL'), ("SOH", '\SOH'), ("STX", '\STX'), ("ETX", '\ETX')
223-
, ("EOT", '\EOT'), ("ENQ", '\ENQ'), ("ACK", '\ACK'), ("BEL", '\BEL')
224-
, ("BS", '\BS'), ("HT", '\HT'), ("LF", '\LF'), ("VT", '\VT')
225-
, ("FF", '\FF'), ("CR", '\CR'), ("SO", '\SO'), ("SI", '\SI')
226-
, ("DLE", '\DLE'), ("DC1", '\DC1'), ("DC2", '\DC2'), ("DC3", '\DC3')
227-
, ("DC4", '\DC4'), ("NAK", '\NAK'), ("SYN", '\SYN'), ("ETB", '\ETB')
228-
, ("CAN", '\CAN'), ("EM", '\EM'), ("SUB", '\SUB'), ("ESC", '\ESC')
229-
, ("FS", '\FS'), ("GS", '\GS'), ("RS", '\RS'), ("US", '\US')
230-
, ("DEL", '\DEL')
231-
, ("PAD", '\x80'), ("HOP", '\x81'), ("BPH", '\x82'), ("NBH", '\x83')
232-
, ("IND", '\x84'), ("NEL", '\x85'), ("SSA", '\x86'), ("ESA", '\x87')
233-
, ("HTS", '\x88'), ("HTJ", '\x89'), ("VTS", '\x8A'), ("PLD", '\x8B')
234-
, ("PLU", '\x8C'), ("RI", '\x8D'), ("SS2", '\x8E'), ("SS3", '\x8F')
235-
, ("DCS", '\x90'), ("PU1", '\x91'), ("PU2", '\x92'), ("STS", '\x93')
236-
, ("CCH", '\x94'), ("MW", '\x95'), ("SPA", '\x96'), ("EPA", '\x97')
237-
, ("SOS", '\x98'), ("SGCI",'\x99'), ("SCI", '\x9A'), ("CSI", '\x9B')
238-
, ("ST", '\x9C'), ("OSC", '\x9D'), ("PM", '\x9E'), ("APC", '\x9F')
220+
[ only '\NUL' >? terminal "NUL"
221+
, only '\SOH' >? terminal "SOH"
222+
, only '\STX' >? terminal "STX"
223+
, only '\ETX' >? terminal "ETX"
224+
, only '\EOT' >? terminal "EOT"
225+
, only '\ENQ' >? terminal "ENQ"
226+
, only '\ACK' >? terminal "ACK"
227+
, only '\BEL' >? terminal "BEL"
228+
, only '\BS' >? terminal "BS"
229+
, only '\HT' >? terminal "HT"
230+
, only '\LF' >? terminal "LF"
231+
, only '\VT' >? terminal "VT"
232+
, only '\FF' >? terminal "FF"
233+
, only '\CR' >? terminal "CR"
234+
, only '\SO' >? terminal "SO"
235+
, only '\SI' >? terminal "SI"
236+
, only '\DLE' >? terminal "DLE"
237+
, only '\DC1' >? terminal "DC1"
238+
, only '\DC2' >? terminal "DC2"
239+
, only '\DC3' >? terminal "DC3"
240+
, only '\DC4' >? terminal "DC4"
241+
, only '\NAK' >? terminal "NAK"
242+
, only '\SYN' >? terminal "SYN"
243+
, only '\ETB' >? terminal "ETB"
244+
, only '\CAN' >? terminal "CAN"
245+
, only '\EM' >? terminal "EM"
246+
, only '\SUB' >? terminal "SUB"
247+
, only '\ESC' >? terminal "ESC"
248+
, only '\FS' >? terminal "FS"
249+
, only '\GS' >? terminal "GS"
250+
, only '\RS' >? terminal "RS"
251+
, only '\US' >? terminal "US"
252+
, only '\DEL' >? terminal "DEL"
253+
, only '\x80' >? terminal "PAD"
254+
, only '\x81' >? terminal "HOP"
255+
, only '\x82' >? terminal "BPH"
256+
, only '\x83' >? terminal "NBH"
257+
, only '\x84' >? terminal "IND"
258+
, only '\x85' >? terminal "NEL"
259+
, only '\x86' >? terminal "SSA"
260+
, only '\x87' >? terminal "ESA"
261+
, only '\x88' >? terminal "HTS"
262+
, only '\x89' >? terminal "HTJ"
263+
, only '\x8A' >? terminal "VTS"
264+
, only '\x8B' >? terminal "PLD"
265+
, only '\x8C' >? terminal "PLU"
266+
, only '\x8D' >? terminal "RI"
267+
, only '\x8E' >? terminal "SS2"
268+
, only '\x8F' >? terminal "SS3"
269+
, only '\x90' >? terminal "DCS"
270+
, only '\x91' >? terminal "PU1"
271+
, only '\x92' >? terminal "PU2"
272+
, only '\x93' >? terminal "STS"
273+
, only '\x94' >? terminal "CCH"
274+
, only '\x95' >? terminal "MW"
275+
, only '\x96' >? terminal "SPA"
276+
, only '\x97' >? terminal "EPA"
277+
, only '\x98' >? terminal "SOS"
278+
, only '\x99' >? terminal "SGCI"
279+
, only '\x9A' >? terminal "SCI"
280+
, only '\x9B' >? terminal "CSI"
281+
, only '\x9C' >? terminal "ST"
282+
, only '\x9D' >? terminal "OSC"
283+
, only '\x9E' >? terminal "PM"
284+
, only '\x9F' >? terminal "APC"
239285
]
240286

287+
{- |
288+
>>> putStringLn (regbnfG regbnfGrammar)
289+
{start} = \q{regbnf}
290+
{alternate} = \q{sequence}(\|\q{sequence})*
291+
{atom} = (\\q\{)\q{char}*\}|\q{char}|\q{char-class}|\(\q{regex}\)
292+
{category} = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn
293+
{category-test} = (\\p\{)\q{category}\}|(\\P\{)(\q{category}(\|\q{category})*)\}
294+
{char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped}
295+
{char-any} = \[\^\]
296+
{char-class} = \q{fail}|\q{char-any}|\q{one-of}|\q{not-one-of}|\q{category-test}
297+
{char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC
298+
{char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control}
299+
{expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom}
300+
{fail} = \[\]
301+
{not-one-of} = (\[\^)\q{char}+(\q{category-test}?\])
302+
{one-of} = \[\q{char}+\]
303+
{regbnf} = (\{start\} = )\q{regex}(\LF\q{rule})*
304+
{regex} = \q{alternate}
305+
{rule} = \{\q{char}*(\} = )\q{regex}
306+
{sequence} = \q{char}*|\q{expression}*
307+
-}
241308
regbnfGrammar :: Grammar Char RegBnf
242309
regbnfGrammar = rule "regbnf" $ _RegBnf . _Bnf >~
243310
terminal "{start} = " >* regexGrammar

0 commit comments

Comments
 (0)