Skip to content

Commit 41d855b

Browse files
committed
✨ Add the generic "stringprep" algorithm
* Update `check_prohibited!`: * Fix error reporting: sending profile with the exception. * Update error reporting: `TABLE_REGEXPS.fetch` => `IndexError` with its "key not found: table" message is much more helpful than `nil.match?` => `NoMethodError`. * Accept regexp in addition to table names. * Add stringprep args for stored and unassigned: `stored: true` checks the `unassigned` table (with "A.1" as default). * Add the RFC4422 mapping tables as hashes. * Update rdoc for StringPrep tables
1 parent 05e9741 commit 41d855b

File tree

4 files changed

+182
-60
lines changed

4 files changed

+182
-60
lines changed

lib/net/imap/stringprep.rb

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,47 @@ def self.[](table)
5050

5151
module_function
5252

53+
# >>>
54+
# 1. Map -- For each character in the input, check if it has a mapping
55+
# and, if so, replace it with its mapping. This is described in
56+
# section 3.
57+
#
58+
# 2. Normalize -- Possibly normalize the result of step 1 using Unicode
59+
# normalization. This is described in section 4.
60+
#
61+
# 3. Prohibit -- Check for any characters that are not allowed in the
62+
# output. If any are found, return an error. This is described in
63+
# section 5.
64+
#
65+
# 4. Check bidi -- Possibly check for right-to-left characters, and if
66+
# any are found, make sure that the whole string satisfies the
67+
# requirements for bidirectional strings. If the string does not
68+
# satisfy the requirements for bidirectional strings, return an
69+
# error. This is described in section 6.
70+
#
71+
# The above steps MUST be performed in the order given to comply with
72+
# this specification.
73+
#
74+
def stringprep(string,
75+
maps:,
76+
normalization:,
77+
prohibited:,
78+
**opts)
79+
string = string.encode("UTF-8") # also dups (and raises invalid encoding)
80+
map_tables!(string, *maps) if maps
81+
string.unicode_normalize!(normalization) if normalization
82+
check_prohibited!(string, *prohibited, **opts) if prohibited
83+
string
84+
end
85+
86+
def map_tables!(string, *tables)
87+
tables.each do |table|
88+
regexp, replacements = Tables::MAPPINGS.fetch(table)
89+
string.gsub!(regexp, replacements)
90+
end
91+
string
92+
end
93+
5394
# Checks +string+ for any codepoint in +tables+. Raises a
5495
# ProhibitedCodepoint describing the first matching table.
5596
#
@@ -58,13 +99,27 @@ def self.[](table)
5899
#
59100
# +profile+ is an optional string which will be added to any exception that
60101
# is raised (it does not affect behavior).
61-
def check_prohibited!(string, *tables, bidi: false, profile: nil)
62-
tables = TABLE_TITLES.keys.grep(/^C/) if tables.empty?
102+
def check_prohibited!(string,
103+
*tables,
104+
bidi: false,
105+
unassigned: "A.1",
106+
stored: false,
107+
profile: nil)
108+
tables = Tables::TITLES.keys.grep(/^C/) if tables.empty?
109+
tables |= [unassigned] if stored
63110
tables |= %w[C.8] if bidi
64-
table = tables.find {|t| Tables::REGEXPS[t].match?(string) }
65-
raise ProhibitedCodepoint.new(
66-
table, string: string, profile: nil
67-
) if table
111+
table = tables.find {|t|
112+
case t
113+
when String then Tables::REGEXPS.fetch(t).match?(string)
114+
when Regexp then t.match?(string)
115+
else raise ArgumentError, "only table names and regexps can be checked"
116+
end
117+
}
118+
if table
119+
raise ProhibitedCodepoint.new(
120+
table, string: string, profile: profile
121+
)
122+
end
68123
check_bidi!(string, profile: profile) if bidi
69124
end
70125

lib/net/imap/stringprep/saslprep_tables.rb

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,25 @@ module Net::IMAP::StringPrep
99
module SASLprep
1010

1111
# RFC4013 §2.1 Mapping - mapped to space
12-
# * non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can be
13-
# mapped to SPACE (U+0020), and
12+
# >>>
13+
# non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can
14+
# be mapped to SPACE (U+0020)
1415
#
1516
# Equal to \StringPrep\[\"C.1.2\"].
16-
# Redefined here to avoid loading the StringPrep module.
17+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
1718
MAP_TO_SPACE = /[\u200b\p{Zs}&&[^ ]]/.freeze
1819

1920
# RFC4013 §2.1 Mapping - mapped to nothing
20-
# the "commonly mapped to nothing" characters (\StringPrep\[\"B.1\"])
21-
# that can be mapped to nothing.
21+
# >>>
22+
# the "commonly mapped to nothing" characters
23+
# (\StringPrep\[\"B.1\"]) that can be mapped to nothing.
2224
#
2325
# Equal to \StringPrep\[\"B.1\"].
24-
# Redefined here to avoid loading the StringPrep module.
26+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
2527
MAP_TO_NOTHING = /[\u{00ad 034f 1806 2060 feff}\u{180b}-\u{180d}\u{200b}-\u{200d}\u{fe00}-\u{fe0f}]/.freeze
2628

27-
# RFC4013 §2.3 Prohibited Output::
29+
# RFC4013 §2.3 Prohibited Output
30+
# >>>
2831
# * Non-ASCII space characters — \StringPrep\[\"C.1.2\"]
2932
# * ASCII control characters — \StringPrep\[\"C.2.1\"]
3033
# * Non-ASCII control characters — \StringPrep\[\"C.2.2\"]
@@ -39,32 +42,40 @@ module SASLprep
3942

4043
# Adds unassigned (by Unicode 3.2) codepoints to TABLES_PROHIBITED.
4144
#
42-
# RFC4013 §2.5 Unassigned Code Points::
43-
# This profile specifies the \StringPrep\[\"A.1\"] table as its list of
44-
# unassigned code points.
45+
# RFC4013 §2.5 Unassigned Code Points
46+
# >>>
47+
# This profile specifies the \StringPrep\[\"A.1\"] table as its
48+
# list of unassigned code points.
4549
TABLES_PROHIBITED_STORED = ["A.1", *TABLES_PROHIBITED].freeze
4650

47-
# Matches codepoints prohibited by RFC4013 §2.3.
51+
# A Regexp matching codepoints prohibited by RFC4013 §2.3.
4852
#
49-
# See TABLES_PROHIBITED.
50-
#
51-
# Equal to +Regexp.union+ of the TABLES_PROHIBITED tables. Redefined
52-
# here to avoid loading the StringPrep module unless necessary.
53+
# This combines all of the TABLES_PROHIBITED tables.
5354
PROHIBITED_OUTPUT = /[\u{06dd 070f 1680 180e 3000 feff e0001}\u{0000}-\u{001f}\u{007f}-\u{00a0}\u{0340}-\u{0341}\u{2000}-\u{200f}\u{2028}-\u{202f}\u{205f}-\u{2063}\u{206a}-\u{206f}\u{2ff0}-\u{2ffb}\u{e000}-\u{f8ff}\u{fdd0}-\u{fdef}\u{fff9}-\u{ffff}\u{1d173}-\u{1d17a}\u{1fffe}-\u{1ffff}\u{2fffe}-\u{2ffff}\u{3fffe}-\u{3ffff}\u{4fffe}-\u{4ffff}\u{5fffe}-\u{5ffff}\u{6fffe}-\u{6ffff}\u{7fffe}-\u{7ffff}\u{8fffe}-\u{8ffff}\u{9fffe}-\u{9ffff}\u{afffe}-\u{affff}\u{bfffe}-\u{bffff}\u{cfffe}-\u{cffff}\u{dfffe}-\u{dffff}\u{e0020}-\u{e007f}\u{efffe}-\u{10ffff}\p{Cs}]/.freeze
5455

55-
# RFC4013 §2.5 Unassigned Code Points::
56-
# This profile specifies the \StringPrep\[\"A.1\"] table as its list of
57-
# unassigned code points.
56+
# RFC4013 §2.5 Unassigned Code Points
57+
# >>>
58+
# This profile specifies the \StringPrep\[\"A.1\"] table as its
59+
# list of unassigned code points.
60+
#
61+
# Equal to \StringPrep\[\"A.1\"].
62+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
5863
UNASSIGNED = /\p{^AGE=3.2}/.freeze
5964

60-
# Matches codepoints prohibited by RFC4013 §2.3 and §2.5.
65+
# A Regexp matching codepoints prohibited by RFC4013 §2.3 and §2.5.
6166
#
62-
# See TABLES_PROHIBITED_STORED.
67+
# This combines PROHIBITED_OUTPUT and UNASSIGNED.
6368
PROHIBITED_OUTPUT_STORED = Regexp.union(
6469
UNASSIGNED, PROHIBITED_OUTPUT
6570
).freeze
6671

6772
# Bidirectional Characters [StringPrep, §6]
73+
#
74+
# A Regexp for strings that don't satisfy StringPrep's Bidirectional
75+
# Characters rules.
76+
#
77+
# Equal to StringPrep::Tables::BIDI_FAILURE.
78+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
6879
BIDI_FAILURE = /(?mx-i: # RandALCat followed by LCat
6980
(?<r_and_al_cat>[\u{05be 05c0 05c3 061b 061f 06dd 0710 07b1 200f fb1d fb3e}\u{05d0}-\u{05ea}\u{05f0}-\u{05f4}\u{0621}-\u{063a}\u{0640}-\u{064a}\u{066d}-\u{066f}\u{0671}-\u{06d5}\u{06e5}-\u{06e6}\u{06fa}-\u{06fe}\u{0700}-\u{070d}\u{0712}-\u{072c}\u{0780}-\u{07a5}\u{fb1f}-\u{fb28}\u{fb2a}-\u{fb36}\u{fb38}-\u{fb3c}\u{fb40}-\u{fb41}\u{fb43}-\u{fb44}\u{fb46}-\u{fbb1}\u{fbd3}-\u{fd3d}\u{fd50}-\u{fd8f}\u{fd92}-\u{fdc7}\u{fdf0}-\u{fdfc}\u{fe70}-\u{fe74}\u{fe76}-\u{fefc}])
7081
.*?
@@ -79,17 +90,16 @@ module SASLprep
7990
\g<r_and_al_cat> .*? \g<not_r_nor_al>\z
8091
)/mx.freeze
8192

82-
# Matches strings prohibited by RFC4013 §2.3 and §2.4.
93+
# A Regexp matching strings prohibited by RFC4013 §2.3 and §2.4.
8394
#
84-
# This checks prohibited output and bidirectional characters.
95+
# This combines PROHIBITED_OUTPUT and BIDI_FAILURE.
8596
PROHIBITED = Regexp.union(
8697
PROHIBITED_OUTPUT, BIDI_FAILURE,
8798
)
8899

89-
# Matches strings prohibited by RFC4013 §2.3, §2.4, and §2.5.
100+
# A Regexp matching strings prohibited by RFC4013 §2.3, §2.4, and §2.5.
90101
#
91-
# This checks prohibited output, bidirectional characters, and
92-
# unassigned codepoints.
102+
# This combines PROHIBITED_OUTPUT_STORED and BIDI_FAILURE.
93103
PROHIBITED_STORED = Regexp.union(
94104
PROHIBITED_OUTPUT_STORED, BIDI_FAILURE,
95105
)

0 commit comments

Comments
 (0)