@@ -130,11 +130,39 @@ module RFC3629
130
130
include RFC5234
131
131
include RFC3629
132
132
133
+ # CHAR8 = %x01-ff
134
+ # ; any OCTET except NUL, %x00
135
+ CHAR8 = /[\x01 -\xff ]/n
136
+
137
+ # list-wildcards = "%" / "*"
138
+ LIST_WILDCARDS = /[%*]/n
133
139
# quoted-specials = DQUOTE / "\"
134
140
QUOTED_SPECIALS = /["\\ ]/n
135
141
# resp-specials = "]"
136
142
RESP_SPECIALS = /[\] ]/n
137
143
144
+ # atomish = 1*<any ATOM-CHAR except "[">
145
+ # ; We use "atomish" for msg-att and section, in order
146
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
147
+ #
148
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
149
+ # quoted-specials / resp-specials
150
+ # ATOM-CHAR = <any CHAR except atom-specials>
151
+ # atom = 1*ATOM-CHAR
152
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
153
+ # tag = 1*<any ASTRING-CHAR except "+">
154
+
155
+ ATOM_SPECIALS = /[(){ \x00 -\x1f \x7f %*"\\ \] ]/n
156
+ ASTRING_SPECIALS = /[(){ \x00 -\x1f \x7f %*"\\ ]/n
157
+
158
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
159
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
160
+
161
+ ATOM = /#{ ATOM_CHAR } +/n
162
+ ASTRING_CHARS = /#{ ASTRING_CHAR } +/n
163
+ ATOMISH = /#{ ATOM_CHAR - /[\[ ]/ } +/
164
+ TAG = /#{ ASTRING_CHAR - /[+]/ } +/
165
+
138
166
# TEXT-CHAR = <any CHAR except CR and LF>
139
167
TEXT_CHAR = CHAR - /[\r \n ]/
140
168
@@ -167,6 +195,19 @@ module RFC3629
167
195
TEXT_rev1 = /#{ TEXT_CHAR } +/
168
196
TEXT_rev2 = /#{ Regexp . union TEXT_CHAR , UTF8_2 , UTF8_3 , UTF8_4 } +/
169
197
198
+ # RFC3501:
199
+ # literal = "{" number "}" CRLF *CHAR8
200
+ # ; Number represents the number of CHAR8s
201
+ # RFC9051:
202
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
203
+ # ; <number64> represents the number of CHAR8s.
204
+ # ; A non-synchronizing literal is distinguished
205
+ # ; from a synchronizing literal by the presence of
206
+ # ; "+" before the closing "}".
207
+ # ; Non-synchronizing literals are not allowed when
208
+ # ; sent from server to the client.
209
+ LITERAL = /\{ (\d +)\} \r \n /n
210
+
170
211
module_function
171
212
172
213
def unescape_quoted! ( quoted )
@@ -185,30 +226,36 @@ def unescape_quoted(quoted)
185
226
186
227
# the default, used in most places
187
228
BEG_REGEXP = /\G (?:\
188
- (?# 1: SPACE )( +)|\
189
- (?# 2: NIL )(NIL)(?=[\x80 -\xff (){ \x00 -\x1f \x7f %*"\\ \[ \] +])|\
190
- (?# 3: NUMBER )(\d +)(?=[\x80 -\xff (){ \x00 -\x1f \x7f %*"\\ \[ \] +])|\
191
- (?# 4: ATOM )([^\x80 -\xff (){ \x00 -\x1f \x7f %*"\\ \[ \] +]+)|\
192
- (?# 5: QUOTED )#{ Patterns ::QUOTED_rev2 } |\
193
- (?# 6: LPAR )(\( )|\
194
- (?# 7: RPAR )(\) )|\
195
- (?# 8: BSLASH )(\\ )|\
196
- (?# 9: STAR )(\* )|\
197
- (?# 10: LBRA )(\[ )|\
198
- (?# 11: RBRA )(\] )|\
199
- (?# 12: LITERAL )\{ (\d +)\} \r \n |\
200
- (?# 13: PLUS )(\+ )|\
201
- (?# 14: PERCENT )(%)|\
202
- (?# 15: CRLF )(\r \n )|\
203
- (?# 16: EOF )(\z ))/ni
229
+ (?# 1: SPACE )( )|\
230
+ (?# 2: ATOM prefixed with a compatible subtype)\
231
+ ((?:\
232
+ (?# 3: NIL )(NIL)|\
233
+ (?# 4: NUMBER )(\d +)|\
234
+ (?# 5: PLUS )(\+ ))\
235
+ (?# 6: ATOM remaining after prefix )(#{ Patterns ::ATOMISH } )?\
236
+ (?# This enables greedy alternation without lookahead, in linear time.)\
237
+ )|\
238
+ (?# Also need to check for ATOM without a subtype prefix.)\
239
+ (?# 7: ATOM )(#{ Patterns ::ATOMISH } )|\
240
+ (?# 8: QUOTED )#{ Patterns ::QUOTED_rev2 } |\
241
+ (?# 9: LPAR )(\( )|\
242
+ (?# 10: RPAR )(\) )|\
243
+ (?# 11: BSLASH )(\\ )|\
244
+ (?# 12: STAR )(\* )|\
245
+ (?# 13: LBRA )(\[ )|\
246
+ (?# 14: RBRA )(\] )|\
247
+ (?# 15: LITERAL )#{ Patterns ::LITERAL } |\
248
+ (?# 16: PERCENT )(%)|\
249
+ (?# 17: CRLF )(\r \n )|\
250
+ (?# 18: EOF )(\z ))/ni
204
251
205
252
# envelope, body(structure), namespaces
206
253
DATA_REGEXP = /\G (?:\
207
254
(?# 1: SPACE )( )|\
208
255
(?# 2: NIL )(NIL)|\
209
256
(?# 3: NUMBER )(\d +)|\
210
257
(?# 4: QUOTED )#{ Patterns ::QUOTED_rev2 } |\
211
- (?# 5: LITERAL )\{ ( \d +) \} \r \n |\
258
+ (?# 5: LITERAL )#{ Patterns :: LITERAL } |\
212
259
(?# 6: LPAR )(\( )|\
213
260
(?# 7: RPAR )(\) ))/ni
214
261
@@ -1501,38 +1548,42 @@ def next_token
1501
1548
@pos = $~. end ( 0 )
1502
1549
if $1
1503
1550
return Token . new ( T_SPACE , $+)
1504
- elsif $2
1505
- return Token . new ( T_NIL , $+)
1551
+ elsif $2 && $6
1552
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1553
+ return Token . new ( T_ATOM , $2)
1506
1554
elsif $3
1507
- return Token . new ( T_NUMBER , $+)
1555
+ return Token . new ( T_NIL , $+)
1508
1556
elsif $4
1509
- return Token . new ( T_ATOM , $+)
1557
+ return Token . new ( T_NUMBER , $+)
1510
1558
elsif $5
1559
+ return Token . new ( T_PLUS , $+)
1560
+ elsif $7
1561
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1562
+ return Token . new ( T_ATOM , $+)
1563
+ elsif $8
1511
1564
return Token . new ( T_QUOTED , Patterns . unescape_quoted ( $+) )
1512
- elsif $6
1565
+ elsif $9
1513
1566
return Token . new ( T_LPAR , $+)
1514
- elsif $7
1567
+ elsif $10
1515
1568
return Token . new ( T_RPAR , $+)
1516
- elsif $8
1569
+ elsif $11
1517
1570
return Token . new ( T_BSLASH , $+)
1518
- elsif $9
1571
+ elsif $12
1519
1572
return Token . new ( T_STAR , $+)
1520
- elsif $10
1573
+ elsif $13
1521
1574
return Token . new ( T_LBRA , $+)
1522
- elsif $11
1575
+ elsif $14
1523
1576
return Token . new ( T_RBRA , $+)
1524
- elsif $12
1577
+ elsif $15
1525
1578
len = $+. to_i
1526
1579
val = @str [ @pos , len ]
1527
1580
@pos += len
1528
1581
return Token . new ( T_LITERAL , val )
1529
- elsif $13
1530
- return Token . new ( T_PLUS , $+)
1531
- elsif $14
1582
+ elsif $16
1532
1583
return Token . new ( T_PERCENT , $+)
1533
- elsif $15
1584
+ elsif $17
1534
1585
return Token . new ( T_CRLF , $+)
1535
- elsif $16
1586
+ elsif $18
1536
1587
return Token . new ( T_EOF , $+)
1537
1588
else
1538
1589
parse_error ( "[Net::IMAP BUG] BEG_REGEXP is invalid" )
0 commit comments