@@ -54,6 +54,7 @@ def parse(str)
54
54
T_STAR = :STAR # atom special; list wildcard
55
55
T_PERCENT = :PERCENT # atom special; list wildcard
56
56
T_LITERAL = :LITERAL # starts with atom special
57
+ T_LITERAL8 = :LITERAL8 # starts with atom char "~"
57
58
T_CRLF = :CRLF # atom special; text special; quoted special
58
59
T_TEXT = :TEXT # any char except CRLF
59
60
T_EOF = :EOF # end of response string
@@ -279,6 +280,16 @@ module RFC3629
279
280
# ; sent from server to the client.
280
281
LITERAL = /\{ (\d +)\} \r \n /n
281
282
283
+ # RFC3516 (BINARY):
284
+ # literal8 = "~{" number "}" CRLF *OCTET
285
+ # ; <number> represents the number of OCTETs
286
+ # ; in the response string.
287
+ # RFC9051:
288
+ # literal8 = "~{" number64 "}" CRLF *OCTET
289
+ # ; <number64> represents the number of OCTETs
290
+ # ; in the response string.
291
+ LITERAL8 = /~\{ (\d +)\} \r \n /n
292
+
282
293
module_function
283
294
284
295
def unescape_quoted! ( quoted )
@@ -298,27 +309,28 @@ def unescape_quoted(quoted)
298
309
# the default, used in most places
299
310
BEG_REGEXP = /\G (?:\
300
311
(?# 1: SPACE )( )|\
301
- (?# 2: ATOM prefixed with a compatible subtype)\
312
+ (?# 2: LITERAL8)#{ Patterns ::LITERAL8 } |\
313
+ (?# 3: ATOM prefixed with a compatible subtype)\
302
314
((?:\
303
- (?# 3 : NIL )(NIL)|\
304
- (?# 4 : NUMBER )(\d +)|\
305
- (?# 5 : PLUS )(\+ ))\
306
- (?# 6 : ATOM remaining after prefix )(#{ Patterns ::ATOMISH } )?\
315
+ (?# 4 : NIL )(NIL)|\
316
+ (?# 5 : NUMBER )(\d +)|\
317
+ (?# 6 : PLUS )(\+ ))\
318
+ (?# 7 : ATOM remaining after prefix )(#{ Patterns ::ATOMISH } )?\
307
319
(?# This enables greedy alternation without lookahead, in linear time.)\
308
320
)|\
309
321
(?# Also need to check for ATOM without a subtype prefix.)\
310
- (?# 7 : ATOM )(#{ Patterns ::ATOMISH } )|\
311
- (?# 8 : QUOTED )#{ Patterns ::QUOTED_rev2 } |\
312
- (?# 9 : LPAR )(\( )|\
313
- (?# 10 : RPAR )(\) )|\
314
- (?# 11 : BSLASH )(\\ )|\
315
- (?# 12 : STAR )(\* )|\
316
- (?# 13 : LBRA )(\[ )|\
317
- (?# 14 : RBRA )(\] )|\
318
- (?# 15 : LITERAL )#{ Patterns ::LITERAL } |\
319
- (?# 16 : PERCENT )(%)|\
320
- (?# 17 : CRLF )(\r \n )|\
321
- (?# 18 : EOF )(\z ))/ni
322
+ (?# 8 : ATOM )(#{ Patterns ::ATOMISH } )|\
323
+ (?# 9 : QUOTED )#{ Patterns ::QUOTED_rev2 } |\
324
+ (?# 10 : LPAR )(\( )|\
325
+ (?# 11 : RPAR )(\) )|\
326
+ (?# 12 : BSLASH )(\\ )|\
327
+ (?# 13 : STAR )(\* )|\
328
+ (?# 14 : LBRA )(\[ )|\
329
+ (?# 15 : RBRA )(\] )|\
330
+ (?# 16 : LITERAL )#{ Patterns ::LITERAL } |\
331
+ (?# 17 : PERCENT )(%)|\
332
+ (?# 18 : CRLF )(\r \n )|\
333
+ (?# 19 : EOF )(\z ))/ni
322
334
323
335
# envelope, body(structure), namespaces
324
336
DATA_REGEXP = /\G (?:\
@@ -359,6 +371,9 @@ def unescape_quoted(quoted)
359
371
# string = quoted / literal
360
372
def_token_matchers :string , T_QUOTED , T_LITERAL
361
373
374
+ # used by nstring8 = nstring / literal8
375
+ def_token_matchers :string8 , T_QUOTED , T_LITERAL , T_LITERAL8
376
+
362
377
# use where string represents "LABEL" values
363
378
def_token_matchers :case_insensitive__string ,
364
379
T_QUOTED , T_LITERAL ,
@@ -460,6 +475,10 @@ def nstring
460
475
NIL? ? nil : string
461
476
end
462
477
478
+ def nstring8
479
+ NIL? ? nil : string8
480
+ end
481
+
463
482
def nquoted
464
483
NIL? ? nil : quoted
465
484
end
@@ -740,6 +759,8 @@ def msg_att(n)
740
759
when "ENVELOPE" then envelope
741
760
when "INTERNALDATE" then date_time
742
761
when "RFC822.SIZE" then number64
762
+ when /\A BINARY\[ /ni then nstring8 # BINARY, IMAP4rev2
763
+ when /\A BINARY\. SIZE\[ /ni then number # BINARY, IMAP4rev2
743
764
when "RFC822" then nstring # not in rev2
744
765
when "RFC822.HEADER" then nstring # not in rev2
745
766
when "RFC822.TEXT" then nstring # not in rev2
@@ -762,11 +783,18 @@ def msg_att__label
762
783
lbra? and rbra
763
784
when "BODY"
764
785
peek_lbra? and name << section and
765
- peek_str? ( "<" ) and name << atom # partial
786
+ peek_str? ( "<" ) and name << gt__number__lt # partial
787
+ when "BINARY" , "BINARY.SIZE"
788
+ name << section_binary
789
+ # see https://www.rfc-editor.org/errata/eid7246 and the note above
790
+ peek_str? ( "<" ) and name << gt__number__lt # partial
766
791
end
767
792
name
768
793
end
769
794
795
+ # this represents the partial size for BODY or BINARY
796
+ alias gt__number__lt atom
797
+
770
798
def envelope
771
799
@lex_state = EXPR_DATA
772
800
token = lookahead
@@ -1070,6 +1098,13 @@ def section
1070
1098
str << rbra
1071
1099
end
1072
1100
1101
+ # section-binary = "[" [section-part] "]"
1102
+ def section_binary
1103
+ str = +lbra
1104
+ str << section_part unless peek_rbra?
1105
+ str << rbra
1106
+ end
1107
+
1073
1108
# section-spec = section-msgtext / (section-part ["." section-text])
1074
1109
# section-msgtext = "HEADER" /
1075
1110
# "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1100,6 +1135,11 @@ def header_list
1100
1135
str << rpar
1101
1136
end
1102
1137
1138
+ # section-part = nz-number *("." nz-number)
1139
+ # ; body part reference.
1140
+ # ; Allows for accessing nested body parts.
1141
+ alias section_part atom
1142
+
1103
1143
# RFC3501 & RFC9051:
1104
1144
# header-fld-name = astring
1105
1145
#
@@ -1789,42 +1829,47 @@ def next_token
1789
1829
@pos = $~. end ( 0 )
1790
1830
if $1
1791
1831
return Token . new ( T_SPACE , $+)
1792
- elsif $2 && $6
1832
+ elsif $2
1833
+ len = $+. to_i
1834
+ val = @str [ @pos , len ]
1835
+ @pos += len
1836
+ return Token . new ( T_LITERAL8 , val )
1837
+ elsif $3 && $7
1793
1838
# greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1794
- return Token . new ( T_ATOM , $2)
1795
- elsif $3
1796
- return Token . new ( T_NIL , $+)
1839
+ return Token . new ( T_ATOM , $3)
1797
1840
elsif $4
1798
- return Token . new ( T_NUMBER , $+)
1841
+ return Token . new ( T_NIL , $+)
1799
1842
elsif $5
1843
+ return Token . new ( T_NUMBER , $+)
1844
+ elsif $6
1800
1845
return Token . new ( T_PLUS , $+)
1801
- elsif $7
1846
+ elsif $8
1802
1847
# match ATOM, without a NUMBER, NIL, or PLUS prefix
1803
1848
return Token . new ( T_ATOM , $+)
1804
- elsif $8
1805
- return Token . new ( T_QUOTED , Patterns . unescape_quoted ( $+) )
1806
1849
elsif $9
1807
- return Token . new ( T_LPAR , $+ )
1850
+ return Token . new ( T_QUOTED , Patterns . unescape_quoted ( $+ ) )
1808
1851
elsif $10
1809
- return Token . new ( T_RPAR , $+)
1852
+ return Token . new ( T_LPAR , $+)
1810
1853
elsif $11
1811
- return Token . new ( T_BSLASH , $+)
1854
+ return Token . new ( T_RPAR , $+)
1812
1855
elsif $12
1813
- return Token . new ( T_STAR , $+)
1856
+ return Token . new ( T_BSLASH , $+)
1814
1857
elsif $13
1815
- return Token . new ( T_LBRA , $+)
1858
+ return Token . new ( T_STAR , $+)
1816
1859
elsif $14
1817
- return Token . new ( T_RBRA , $+)
1860
+ return Token . new ( T_LBRA , $+)
1818
1861
elsif $15
1862
+ return Token . new ( T_RBRA , $+)
1863
+ elsif $16
1819
1864
len = $+. to_i
1820
1865
val = @str [ @pos , len ]
1821
1866
@pos += len
1822
1867
return Token . new ( T_LITERAL , val )
1823
- elsif $16
1824
- return Token . new ( T_PERCENT , $+)
1825
1868
elsif $17
1826
- return Token . new ( T_CRLF , $+)
1869
+ return Token . new ( T_PERCENT , $+)
1827
1870
elsif $18
1871
+ return Token . new ( T_CRLF , $+)
1872
+ elsif $19
1828
1873
return Token . new ( T_EOF , $+)
1829
1874
else
1830
1875
parse_error ( "[Net::IMAP BUG] BEG_REGEXP is invalid" )
0 commit comments