Skip to content

Commit 3410e14

Browse files
committed
✨ Add BINARY FETCH support
Lex and parse LITERAL8, string8, nstring8. Add section_binary and section_part The BINARY extension isn't _fully_ supported; that requires updates to the APPEND command. But this should be sufficient for IMAP4rev2, which only requires the FETCH part of the extension.
1 parent 5511dd6 commit 3410e14

File tree

6 files changed

+221
-43
lines changed

6 files changed

+221
-43
lines changed

lib/net/imap.rb

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -404,18 +404,18 @@ module Net
404404
#
405405
# Although IMAP4rev2[https://tools.ietf.org/html/rfc9051] is not supported
406406
# yet, Net::IMAP supports several extensions that have been folded into it:
407-
# +ENABLE+, +IDLE+, +MOVE+, +NAMESPACE+, +SASL-IR+, +UIDPLUS+, and +UNSELECT+.
407+
# +ENABLE+, +IDLE+, +MOVE+, +NAMESPACE+, +SASL-IR+, +UIDPLUS+, +UNSELECT+, and
408+
# the fetch side of +BINARY+.
408409
# Commands for these extensions are listed with the {Core IMAP
409410
# commands}[rdoc-ref:Net::IMAP@Core+IMAP+commands], above.
410411
#
411412
# >>>
412413
# <em>The following are folded into +IMAP4rev2+ but are currently
413414
# unsupported or incompletely supported by</em> Net::IMAP<em>: RFC4466
414-
# extensions, +ESEARCH+, +SEARCHRES+, +LIST-EXTENDED+,
415-
# +LIST-STATUS+, +LITERAL-+, +BINARY+ fetch, and +SPECIAL-USE+. The
416-
# following extensions are implicitly supported, but will be updated with
417-
# more direct support: RFC5530 response codes, <tt>STATUS=SIZE</tt>, and
418-
# <tt>STATUS=DELETED</tt>.</em>
415+
# extensions, +ESEARCH+, +SEARCHRES+, +LIST-EXTENDED+, +LIST-STATUS+,
416+
# +LITERAL-+, and +SPECIAL-USE+. The following extensions are implicitly
417+
# supported, but will be updated with more direct support: RFC5530 response
418+
# codes, <tt>STATUS=SIZE</tt>, and <tt>STATUS=DELETED</tt>.</em>
419419
#
420420
# ==== RFC2087: +QUOTA+
421421
# - #getquota: returns the resource usage and limits for a quota root
@@ -437,6 +437,15 @@ module Net
437437
# ==== RFC2971: +ID+
438438
# - #id: exchanges client and server implementation information.
439439
#
440+
# ==== RFC3516: +BINARY+
441+
# The fetch side of +BINARY+ has been folded into
442+
# IMAP4rev2[https://tools.ietf.org/html/rfc9051].
443+
# - Updates #fetch and #uid_fetch with the +BINARY+, +BINARY.PEEK+, and
444+
# +BINARY.SIZE+ items. See FetchData#binary and FetchData#binary_size.
445+
#
446+
# >>>
447+
# *NOTE:* The binary extension the #append command is _not_ supported yet.
448+
#
440449
# ==== RFC3691: +UNSELECT+
441450
# Folded into IMAP4rev2[https://tools.ietf.org/html/rfc9051] and also included
442451
# above with {Core IMAP commands}[rdoc-ref:Net::IMAP@Core+IMAP+commands].
@@ -612,6 +621,10 @@ module Net
612621
# [ID[https://tools.ietf.org/html/rfc2971]]::
613622
# Showalter, T., "IMAP4 ID extension", RFC 2971, DOI 10.17487/RFC2971,
614623
# October 2000, <https://www.rfc-editor.org/info/rfc2971>.
624+
# [BINARY[https://tools.ietf.org/html/rfc3516]]::
625+
# Nerenberg, L., "IMAP4 Binary Content Extension", RFC 3516,
626+
# DOI 10.17487/RFC3516, April 2003,
627+
# <https://www.rfc-editor.org/info/rfc3516>.
615628
# [ACL[https://tools.ietf.org/html/rfc4314]]::
616629
# Melnikov, A., "IMAP4 Access Control List (ACL) Extension", RFC 4314,
617630
# DOI 10.17487/RFC4314, December 2005,

lib/net/imap/fetch_data.rb

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ class IMAP < Protocol
3333
# * <b><tt>"INTERNALDATE"</tt></b> --- See #internaldate.
3434
# * <b><tt>"RFC822.SIZE"</tt></b> --- See #rfc822_size.
3535
#
36+
# IMAP4rev2[https://www.rfc-editor.org/rfc/rfc9051.html] adds the
37+
# additional fetch items from the +BINARY+ extension
38+
# {[RFC3516]}[https://www.rfc-editor.org/rfc/rfc3516.html]:
39+
#
40+
# * <b><tt>"BINARY[#{part}]"</tt></b>,
41+
# <b><tt>"BINARY[#{part}]<#{offset}>"</tt></b> -- See #binary.
42+
# * <b><tt>"BINARY.SIZE[#{part}]"</tt></b> -- See #binary_size.
43+
#
3644
# Several static message attributes in
3745
# IMAP4rev1[https://www.rfc-editor.org/rfc/rfc3501.html] are obsolete and
3846
# been removed from
@@ -47,8 +55,7 @@ class IMAP < Protocol
4755
#
4856
# [Note:]
4957
# >>>
50-
# Additional static fields are defined in \IMAP extensions and
51-
# IMAP4rev2[https://www.rfc-editor.org/rfc/rfc9051.html], but
58+
# Additional static fields are defined in other \IMAP extensions, but
5259
# Net::IMAP can't parse them yet.
5360
#
5461
# ==== Dynamic message attributes
@@ -389,6 +396,49 @@ def rfc822_text; attr["RFC822.TEXT"] end
389396
# This is the same as getting the value for <tt>"UID"</tt> from #attr.
390397
def uid; attr["UID"] end
391398

399+
# :call-seq:
400+
# binary(*part_nums, offset: nil) -> string or nil
401+
#
402+
# Returns the binary representation of a particular MIME part, which has
403+
# already been decoded according to its Content-Transfer-Encoding.
404+
#
405+
# See #part for a description of +part_nums+ and +offset+.
406+
#
407+
# This is the same as getting the value of
408+
# <tt>"BINARY[#{part_nums.join(".")}]"</tt> or
409+
# <tt>"BINARY[#{part_nums.join(".")}]<#{offset}>"</tt> from #attr.
410+
#
411+
# The server must support either
412+
# IMAP4rev2[https://www.rfc-editor.org/rfc/rfc9051.html]
413+
# or the +BINARY+ extension
414+
# {[RFC3516]}[https://www.rfc-editor.org/rfc/rfc3516.html].
415+
#
416+
# See also: #binary_size, #mime
417+
def binary(*part_nums, offset: nil)
418+
attr[section_attr("BINARY", part_nums, offset: offset)]
419+
end
420+
421+
# :call-seq:
422+
# binary_size(*part_nums) -> integer or nil
423+
#
424+
# Returns the decoded size of a particular MIME part (the size to expect
425+
# in response to a <tt>BINARY</tt> fetch request).
426+
#
427+
# See #part for a description of +part_nums+.
428+
#
429+
# This is the same as getting the value of
430+
# <tt>"BINARY.SIZE[#{part_nums.join(".")}]"</tt> from #attr.
431+
#
432+
# The server must support either
433+
# IMAP4rev2[https://www.rfc-editor.org/rfc/rfc9051.html]
434+
# or the +BINARY+ extension
435+
# {[RFC3516]}[https://www.rfc-editor.org/rfc/rfc3516.html].
436+
#
437+
# See also: #binary, #mime
438+
def binary_size(*part_nums)
439+
attr[section_attr("BINARY.SIZE", part_nums)]
440+
end
441+
392442
# :call-seq: modseq -> Integer
393443
#
394444
# The modification sequence number associated with this IMAP message.

lib/net/imap/response_parser.rb

Lines changed: 80 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def parse(str)
5454
T_STAR = :STAR # atom special; list wildcard
5555
T_PERCENT = :PERCENT # atom special; list wildcard
5656
T_LITERAL = :LITERAL # starts with atom special
57+
T_LITERAL8 = :LITERAL8 # starts with atom char "~"
5758
T_CRLF = :CRLF # atom special; text special; quoted special
5859
T_TEXT = :TEXT # any char except CRLF
5960
T_EOF = :EOF # end of response string
@@ -279,6 +280,16 @@ module RFC3629
279280
# ; sent from server to the client.
280281
LITERAL = /\{(\d+)\}\r\n/n
281282

283+
# RFC3516 (BINARY):
284+
# literal8 = "~{" number "}" CRLF *OCTET
285+
# ; <number> represents the number of OCTETs
286+
# ; in the response string.
287+
# RFC9051:
288+
# literal8 = "~{" number64 "}" CRLF *OCTET
289+
# ; <number64> represents the number of OCTETs
290+
# ; in the response string.
291+
LITERAL8 = /~\{(\d+)\}\r\n/n
292+
282293
module_function
283294

284295
def unescape_quoted!(quoted)
@@ -298,27 +309,28 @@ def unescape_quoted(quoted)
298309
# the default, used in most places
299310
BEG_REGEXP = /\G(?:\
300311
(?# 1: SPACE )( )|\
301-
(?# 2: ATOM prefixed with a compatible subtype)\
312+
(?# 2: LITERAL8)#{Patterns::LITERAL8}|\
313+
(?# 3: ATOM prefixed with a compatible subtype)\
302314
((?:\
303-
(?# 3: NIL )(NIL)|\
304-
(?# 4: NUMBER )(\d+)|\
305-
(?# 5: PLUS )(\+))\
306-
(?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
315+
(?# 4: NIL )(NIL)|\
316+
(?# 5: NUMBER )(\d+)|\
317+
(?# 6: PLUS )(\+))\
318+
(?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
307319
(?# This enables greedy alternation without lookahead, in linear time.)\
308320
)|\
309321
(?# Also need to check for ATOM without a subtype prefix.)\
310-
(?# 7: ATOM )(#{Patterns::ATOMISH})|\
311-
(?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
312-
(?# 9: LPAR )(\()|\
313-
(?# 10: RPAR )(\))|\
314-
(?# 11: BSLASH )(\\)|\
315-
(?# 12: STAR )(\*)|\
316-
(?# 13: LBRA )(\[)|\
317-
(?# 14: RBRA )(\])|\
318-
(?# 15: LITERAL )#{Patterns::LITERAL}|\
319-
(?# 16: PERCENT )(%)|\
320-
(?# 17: CRLF )(\r\n)|\
321-
(?# 18: EOF )(\z))/ni
322+
(?# 8: ATOM )(#{Patterns::ATOMISH})|\
323+
(?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
324+
(?# 10: LPAR )(\()|\
325+
(?# 11: RPAR )(\))|\
326+
(?# 12: BSLASH )(\\)|\
327+
(?# 13: STAR )(\*)|\
328+
(?# 14: LBRA )(\[)|\
329+
(?# 15: RBRA )(\])|\
330+
(?# 16: LITERAL )#{Patterns::LITERAL}|\
331+
(?# 17: PERCENT )(%)|\
332+
(?# 18: CRLF )(\r\n)|\
333+
(?# 19: EOF )(\z))/ni
322334

323335
# envelope, body(structure), namespaces
324336
DATA_REGEXP = /\G(?:\
@@ -359,6 +371,9 @@ def unescape_quoted(quoted)
359371
# string = quoted / literal
360372
def_token_matchers :string, T_QUOTED, T_LITERAL
361373

374+
# used by nstring8 = nstring / literal8
375+
def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
376+
362377
# use where string represents "LABEL" values
363378
def_token_matchers :case_insensitive__string,
364379
T_QUOTED, T_LITERAL,
@@ -460,6 +475,10 @@ def nstring
460475
NIL? ? nil : string
461476
end
462477

478+
def nstring8
479+
NIL? ? nil : string8
480+
end
481+
463482
def nquoted
464483
NIL? ? nil : quoted
465484
end
@@ -740,6 +759,8 @@ def msg_att(n)
740759
when "ENVELOPE" then envelope
741760
when "INTERNALDATE" then date_time
742761
when "RFC822.SIZE" then number64
762+
when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
763+
when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
743764
when "RFC822" then nstring # not in rev2
744765
when "RFC822.HEADER" then nstring # not in rev2
745766
when "RFC822.TEXT" then nstring # not in rev2
@@ -762,11 +783,18 @@ def msg_att__label
762783
lbra? and rbra
763784
when "BODY"
764785
peek_lbra? and name << section and
765-
peek_str?("<") and name << atom # partial
786+
peek_str?("<") and name << gt__number__lt # partial
787+
when "BINARY", "BINARY.SIZE"
788+
name << section_binary
789+
# see https://www.rfc-editor.org/errata/eid7246 and the note above
790+
peek_str?("<") and name << gt__number__lt # partial
766791
end
767792
name
768793
end
769794

795+
# this represents the partial size for BODY or BINARY
796+
alias gt__number__lt atom
797+
770798
def envelope
771799
@lex_state = EXPR_DATA
772800
token = lookahead
@@ -1070,6 +1098,13 @@ def section
10701098
str << rbra
10711099
end
10721100

1101+
# section-binary = "[" [section-part] "]"
1102+
def section_binary
1103+
str = +lbra
1104+
str << section_part unless peek_rbra?
1105+
str << rbra
1106+
end
1107+
10731108
# section-spec = section-msgtext / (section-part ["." section-text])
10741109
# section-msgtext = "HEADER" /
10751110
# "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1100,6 +1135,11 @@ def header_list
11001135
str << rpar
11011136
end
11021137

1138+
# section-part = nz-number *("." nz-number)
1139+
# ; body part reference.
1140+
# ; Allows for accessing nested body parts.
1141+
alias section_part atom
1142+
11031143
# RFC3501 & RFC9051:
11041144
# header-fld-name = astring
11051145
#
@@ -1789,42 +1829,47 @@ def next_token
17891829
@pos = $~.end(0)
17901830
if $1
17911831
return Token.new(T_SPACE, $+)
1792-
elsif $2 && $6
1832+
elsif $2
1833+
len = $+.to_i
1834+
val = @str[@pos, len]
1835+
@pos += len
1836+
return Token.new(T_LITERAL8, val)
1837+
elsif $3 && $7
17931838
# greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1794-
return Token.new(T_ATOM, $2)
1795-
elsif $3
1796-
return Token.new(T_NIL, $+)
1839+
return Token.new(T_ATOM, $3)
17971840
elsif $4
1798-
return Token.new(T_NUMBER, $+)
1841+
return Token.new(T_NIL, $+)
17991842
elsif $5
1843+
return Token.new(T_NUMBER, $+)
1844+
elsif $6
18001845
return Token.new(T_PLUS, $+)
1801-
elsif $7
1846+
elsif $8
18021847
# match ATOM, without a NUMBER, NIL, or PLUS prefix
18031848
return Token.new(T_ATOM, $+)
1804-
elsif $8
1805-
return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
18061849
elsif $9
1807-
return Token.new(T_LPAR, $+)
1850+
return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
18081851
elsif $10
1809-
return Token.new(T_RPAR, $+)
1852+
return Token.new(T_LPAR, $+)
18101853
elsif $11
1811-
return Token.new(T_BSLASH, $+)
1854+
return Token.new(T_RPAR, $+)
18121855
elsif $12
1813-
return Token.new(T_STAR, $+)
1856+
return Token.new(T_BSLASH, $+)
18141857
elsif $13
1815-
return Token.new(T_LBRA, $+)
1858+
return Token.new(T_STAR, $+)
18161859
elsif $14
1817-
return Token.new(T_RBRA, $+)
1860+
return Token.new(T_LBRA, $+)
18181861
elsif $15
1862+
return Token.new(T_RBRA, $+)
1863+
elsif $16
18191864
len = $+.to_i
18201865
val = @str[@pos, len]
18211866
@pos += len
18221867
return Token.new(T_LITERAL, val)
1823-
elsif $16
1824-
return Token.new(T_PERCENT, $+)
18251868
elsif $17
1826-
return Token.new(T_CRLF, $+)
1869+
return Token.new(T_PERCENT, $+)
18271870
elsif $18
1871+
return Token.new(T_CRLF, $+)
1872+
elsif $19
18281873
return Token.new(T_EOF, $+)
18291874
else
18301875
parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
5.24 KB
Loading

test/net/imap/test_fetch_data.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,25 @@ class FetchDataTest < Test::Unit::TestCase
164164
assert_equal "partial mime", data.mime(1, 2, offset: 456)
165165
end
166166

167+
test "#binary(1, 2, 3, offset: 1) returns the BINARY[1.2.3]<1> attr" do
168+
data = FetchData.new(1, {
169+
"BINARY[]" => "binary\0whole".b,
170+
"BINARY[1.2.3]" => "binary\0part".b,
171+
"BINARY[1.2.3]<1>" => "inary\0pa".b,
172+
})
173+
assert_equal "binary\0whole".b, data.binary
174+
assert_equal "binary\0part".b, data.binary(1, 2, 3)
175+
assert_equal "inary\0pa".b, data.binary(1, 2, 3, offset: 1)
176+
end
177+
178+
test "#binary_size(1, 2, 3) returns the BINARY.SIZE[1.2.3] attr" do
179+
data = FetchData.new(1, {
180+
"BINARY.SIZE[]" => 987_654,
181+
"BINARY.SIZE[1.2.3]" => 123_456,
182+
})
183+
assert_equal 987_654, data.binary_size
184+
assert_equal 123_456, data.binary_size(1, 2, 3)
185+
assert_equal 123_456, data.binary_size([1, 2, 3])
186+
end
187+
167188
end

0 commit comments

Comments
 (0)