@@ -9,6 +9,7 @@ class IMAP < Protocol
9
9
# Parses an \IMAP server response.
10
10
class ResponseParser
11
11
include ParserUtils
12
+ extend ParserUtils ::Generator
12
13
13
14
# :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
14
15
def initialize
@@ -38,9 +39,6 @@ def parse(str)
38
39
39
40
EXPR_BEG = :EXPR_BEG # the default, used in most places
40
41
EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
41
- EXPR_TEXT = :EXPR_TEXT # text, after 'resp-text-code "]"'
42
- EXPR_RTEXT = :EXPR_RTEXT # resp-text, before "["
43
- EXPR_CTEXT = :EXPR_CTEXT # resp-text-code, after 'atom SP'
44
42
45
43
T_SPACE = :SPACE # atom special
46
44
T_ATOM = :ATOM # atom (subset of astring chars)
@@ -60,6 +58,60 @@ def parse(str)
60
58
T_TEXT = :TEXT # any char except CRLF
61
59
T_EOF = :EOF # end of response string
62
60
61
+ module Patterns
62
+
63
+ module CharClassSubtraction
64
+ refine Regexp do
65
+ def -( rhs ) ; /[#{ source } &&[^#{ rhs . source } ]]/n . freeze end
66
+ end
67
+ end
68
+ using CharClassSubtraction
69
+
70
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
71
+ # >>>
72
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
73
+ # CHAR = %x01-7F
74
+ # CRLF = CR LF
75
+ # ; Internet standard newline
76
+ # CTL = %x00-1F / %x7F
77
+ # ; controls
78
+ # DIGIT = %x30-39
79
+ # ; 0-9
80
+ # DQUOTE = %x22
81
+ # ; " (Double Quote)
82
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
83
+ # OCTET = %x00-FF
84
+ # SP = %x20
85
+ module RFC5234
86
+ ALPHA = /[A-Za-z]/n
87
+ CHAR = /[\x01 -\x7f ]/n
88
+ CRLF = /\r \n /n
89
+ CTL = /[\x00 -\x1F \x7F ]/n
90
+ DIGIT = /\d /n
91
+ DQUOTE = /"/n
92
+ HEXDIG = /\h /
93
+ OCTET = /[\x00 -\xFF ]/n # not using /./m for embedding purposes
94
+ SP = / /n
95
+ end
96
+
97
+ include RFC5234
98
+
99
+ # resp-specials = "]"
100
+ RESP_SPECIALS = /[\] ]/n
101
+
102
+ # TEXT-CHAR = <any CHAR except CR and LF>
103
+ TEXT_CHAR = CHAR - /[\r \n ]/
104
+
105
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
106
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
107
+ CODE_TEXT = /#{ CODE_TEXT_CHAR } +/n
108
+
109
+ # RFC3501:
110
+ # text = 1*TEXT-CHAR
111
+ TEXT_rev1 = /#{ TEXT_CHAR } +/
112
+
113
+ end
114
+
63
115
# the default, used in most places
64
116
BEG_REGEXP = /\G (?:\
65
117
(?# 1: SPACE )( +)|\
@@ -90,20 +142,18 @@ def parse(str)
90
142
(?# 7: RPAR )(\) ))/ni
91
143
92
144
# text, after 'resp-text-code "]"'
93
- TEXT_REGEXP = /\G (?:\
94
- (?# 1: TEXT )([^\x00 \r \n ]*))/ni
95
-
96
- # resp-text, before "["
97
- RTEXT_REGEXP = /\G (?:\
98
- (?# 1: LBRA )(\[ )|\
99
- (?# 2: TEXT )([^\x00 \r \n ]*))/ni
145
+ TEXT_REGEXP = /\G (#{ Patterns ::TEXT_rev1 } )/n
100
146
101
147
# resp-text-code, after 'atom SP'
102
- CTEXT_REGEXP = /\G (?:\
103
- (?# 1: TEXT )([^\x00 \r \n \] ]*))/ni
148
+ CTEXT_REGEXP = /\G (#{ Patterns ::CODE_TEXT } )/n
104
149
105
150
Token = Struct . new ( :symbol , :value )
106
151
152
+ def_char_matchers :SP , " " , :T_SPACE
153
+
154
+ def_char_matchers :lbra , "[" , :T_LBRA
155
+ def_char_matchers :rbra , "]" , :T_RBRA
156
+
107
157
# atom = 1*ATOM-CHAR
108
158
#
109
159
# TODO: match atom entirely by regexp (in the "lexer")
@@ -1143,20 +1193,27 @@ def namespace_response_extensions
1143
1193
# text = 1*TEXT-CHAR
1144
1194
# TEXT-CHAR = <any CHAR except CR and LF>
1145
1195
def text
1146
- match ( T_TEXT , lex_state : EXPR_TEXT ) . value
1196
+ match_re ( TEXT_REGEXP , "text" ) [ 0 ]
1147
1197
end
1148
1198
1149
- # resp-text = ["[" resp-text-code "]" SP] text
1199
+ # an "accept" versiun of #text
1200
+ def text?
1201
+ accept_re ( TEXT_REGEXP ) &.[]( 0 )
1202
+ end
1203
+
1204
+ # RFC3501:
1205
+ # resp-text = ["[" resp-text-code "]" SP] text
1206
+ # RFC9051:
1207
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1208
+ #
1209
+ # We leniently re-interpret this as
1210
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1150
1211
def resp_text
1151
- token = match ( T_LBRA , T_TEXT , lex_state : EXPR_RTEXT )
1152
- case token . symbol
1153
- when T_LBRA
1154
- code = resp_text_code
1155
- match ( T_RBRA )
1156
- accept_space # violating RFC
1157
- ResponseText . new ( code , text )
1158
- when T_TEXT
1159
- ResponseText . new ( nil , token . value )
1212
+ if lbra?
1213
+ code = resp_text_code ; rbra
1214
+ ResponseText . new ( code , SP? && text? || "" )
1215
+ else
1216
+ ResponseText . new ( nil , text? || "" )
1160
1217
end
1161
1218
end
1162
1219
@@ -1198,15 +1255,19 @@ def resp_text_code
1198
1255
token = lookahead
1199
1256
if token . symbol == T_SPACE
1200
1257
shift_token
1201
- token = match ( T_TEXT , lex_state : EXPR_CTEXT )
1202
- result = ResponseCode . new ( name , token . value )
1258
+ result = ResponseCode . new ( name , text_chars_except_rbra )
1203
1259
else
1204
1260
result = ResponseCode . new ( name , nil )
1205
1261
end
1206
1262
end
1207
1263
return result
1208
1264
end
1209
1265
1266
+ # 1*<any TEXT-CHAR except "]">
1267
+ def text_chars_except_rbra
1268
+ match_re ( CTEXT_REGEXP , '1*<any TEXT-CHAR except "]">' ) [ 0 ]
1269
+ end
1270
+
1210
1271
def charset_list
1211
1272
result = [ ]
1212
1273
if accept ( T_SPACE )
@@ -1447,21 +1508,6 @@ def nil_atom
1447
1508
1448
1509
SPACES_REGEXP = /\G */n
1449
1510
1450
- # This advances @pos directly so it's safe before changing @lex_state.
1451
- def accept_space
1452
- if @token
1453
- if @token . symbol == T_SPACE
1454
- shift_token
1455
- " "
1456
- end
1457
- elsif @str [ @pos ] == " "
1458
- @pos += 1
1459
- " "
1460
- end
1461
- end
1462
-
1463
- alias SP? accept_space
1464
-
1465
1511
# The RFC is very strict about this and usually we should be too.
1466
1512
# But skipping spaces is usually a safe workaround for buggy servers.
1467
1513
#
@@ -1549,44 +1595,6 @@ def next_token
1549
1595
@str . index ( /\S */n , @pos )
1550
1596
parse_error ( "unknown token - %s" , $&. dump )
1551
1597
end
1552
- when EXPR_TEXT
1553
- if @str . index ( TEXT_REGEXP , @pos )
1554
- @pos = $~. end ( 0 )
1555
- if $1
1556
- return Token . new ( T_TEXT , $+)
1557
- else
1558
- parse_error ( "[Net::IMAP BUG] TEXT_REGEXP is invalid" )
1559
- end
1560
- else
1561
- @str . index ( /\S */n , @pos )
1562
- parse_error ( "unknown token - %s" , $&. dump )
1563
- end
1564
- when EXPR_RTEXT
1565
- if @str . index ( RTEXT_REGEXP , @pos )
1566
- @pos = $~. end ( 0 )
1567
- if $1
1568
- return Token . new ( T_LBRA , $+)
1569
- elsif $2
1570
- return Token . new ( T_TEXT , $+)
1571
- else
1572
- parse_error ( "[Net::IMAP BUG] RTEXT_REGEXP is invalid" )
1573
- end
1574
- else
1575
- @str . index ( /\S */n , @pos )
1576
- parse_error ( "unknown token - %s" , $&. dump )
1577
- end
1578
- when EXPR_CTEXT
1579
- if @str . index ( CTEXT_REGEXP , @pos )
1580
- @pos = $~. end ( 0 )
1581
- if $1
1582
- return Token . new ( T_TEXT , $+)
1583
- else
1584
- parse_error ( "[Net::IMAP BUG] CTEXT_REGEXP is invalid" )
1585
- end
1586
- else
1587
- @str . index ( /\S */n , @pos ) #/
1588
- parse_error ( "unknown token - %s" , $&. dump )
1589
- end
1590
1598
else
1591
1599
parse_error ( "invalid @lex_state - %s" , @lex_state . inspect )
1592
1600
end
0 commit comments