Skip to content

Commit fff6dc6

Browse files
authored
🔀 Merge pull request #101 from ruby/stringprep-generic
✨ Add generic stringprep algorithm and the "trace" profile
2 parents 922f4a1 + 08052a9 commit fff6dc6

File tree

6 files changed

+294
-60
lines changed

6 files changed

+294
-60
lines changed

‎lib/net/imap/stringprep.rb

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class IMAP < Protocol
1111
module StringPrep
1212
autoload :SASLprep, File.expand_path("stringprep/saslprep", __dir__)
1313
autoload :Tables, File.expand_path("stringprep/tables", __dir__)
14+
autoload :Trace, File.expand_path("stringprep/trace", __dir__)
1415

1516
# ArgumentError raised when +string+ is invalid for the stringprep
1617
# +profile+.
@@ -50,6 +51,47 @@ def self.[](table)
5051

5152
module_function
5253

54+
# >>>
55+
# 1. Map -- For each character in the input, check if it has a mapping
56+
# and, if so, replace it with its mapping. This is described in
57+
# section 3.
58+
#
59+
# 2. Normalize -- Possibly normalize the result of step 1 using Unicode
60+
# normalization. This is described in section 4.
61+
#
62+
# 3. Prohibit -- Check for any characters that are not allowed in the
63+
# output. If any are found, return an error. This is described in
64+
# section 5.
65+
#
66+
# 4. Check bidi -- Possibly check for right-to-left characters, and if
67+
# any are found, make sure that the whole string satisfies the
68+
# requirements for bidirectional strings. If the string does not
69+
# satisfy the requirements for bidirectional strings, return an
70+
# error. This is described in section 6.
71+
#
72+
# The above steps MUST be performed in the order given to comply with
73+
# this specification.
74+
#
75+
def stringprep(string,
76+
maps:,
77+
normalization:,
78+
prohibited:,
79+
**opts)
80+
string = string.encode("UTF-8") # also dups (and raises invalid encoding)
81+
map_tables!(string, *maps) if maps
82+
string.unicode_normalize!(normalization) if normalization
83+
check_prohibited!(string, *prohibited, **opts) if prohibited
84+
string
85+
end
86+
87+
def map_tables!(string, *tables)
88+
tables.each do |table|
89+
regexp, replacements = Tables::MAPPINGS.fetch(table)
90+
string.gsub!(regexp, replacements)
91+
end
92+
string
93+
end
94+
5395
# Checks +string+ for any codepoint in +tables+. Raises a
5496
# ProhibitedCodepoint describing the first matching table.
5597
#
@@ -58,13 +100,27 @@ def self.[](table)
58100
#
59101
# +profile+ is an optional string which will be added to any exception that
60102
# is raised (it does not affect behavior).
61-
def check_prohibited!(string, *tables, bidi: false, profile: nil)
62-
tables = TABLE_TITLES.keys.grep(/^C/) if tables.empty?
103+
def check_prohibited!(string,
104+
*tables,
105+
bidi: false,
106+
unassigned: "A.1",
107+
stored: false,
108+
profile: nil)
109+
tables = Tables::TITLES.keys.grep(/^C/) if tables.empty?
110+
tables |= [unassigned] if stored
63111
tables |= %w[C.8] if bidi
64-
table = tables.find {|t| Tables::REGEXPS[t].match?(string) }
65-
raise ProhibitedCodepoint.new(
66-
table, string: string, profile: nil
67-
) if table
112+
table = tables.find {|t|
113+
case t
114+
when String then Tables::REGEXPS.fetch(t).match?(string)
115+
when Regexp then t.match?(string)
116+
else raise ArgumentError, "only table names and regexps can be checked"
117+
end
118+
}
119+
if table
120+
raise ProhibitedCodepoint.new(
121+
table, string: string, profile: profile
122+
)
123+
end
68124
check_bidi!(string, profile: profile) if bidi
69125
end
70126

‎lib/net/imap/stringprep/saslprep_tables.rb

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,25 @@ module Net::IMAP::StringPrep
99
module SASLprep
1010

1111
# RFC4013 §2.1 Mapping - mapped to space
12-
# * non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can be
13-
# mapped to SPACE (U+0020), and
12+
# >>>
13+
# non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can
14+
# be mapped to SPACE (U+0020)
1415
#
1516
# Equal to \StringPrep\[\"C.1.2\"].
16-
# Redefined here to avoid loading the StringPrep module.
17+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
1718
MAP_TO_SPACE = /[\u200b\p{Zs}&&[^ ]]/.freeze
1819

1920
# RFC4013 §2.1 Mapping - mapped to nothing
20-
# the "commonly mapped to nothing" characters (\StringPrep\[\"B.1\"])
21-
# that can be mapped to nothing.
21+
# >>>
22+
# the "commonly mapped to nothing" characters
23+
# (\StringPrep\[\"B.1\"]) that can be mapped to nothing.
2224
#
2325
# Equal to \StringPrep\[\"B.1\"].
24-
# Redefined here to avoid loading the StringPrep module.
26+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
2527
MAP_TO_NOTHING = /[\u{00ad 034f 1806 2060 feff}\u{180b}-\u{180d}\u{200b}-\u{200d}\u{fe00}-\u{fe0f}]/.freeze
2628

27-
# RFC4013 §2.3 Prohibited Output::
29+
# RFC4013 §2.3 Prohibited Output
30+
# >>>
2831
# * Non-ASCII space characters — \StringPrep\[\"C.1.2\"]
2932
# * ASCII control characters — \StringPrep\[\"C.2.1\"]
3033
# * Non-ASCII control characters — \StringPrep\[\"C.2.2\"]
@@ -39,32 +42,40 @@ module SASLprep
3942

4043
# Adds unassigned (by Unicode 3.2) codepoints to TABLES_PROHIBITED.
4144
#
42-
# RFC4013 §2.5 Unassigned Code Points::
43-
# This profile specifies the \StringPrep\[\"A.1\"] table as its list of
44-
# unassigned code points.
45+
# RFC4013 §2.5 Unassigned Code Points
46+
# >>>
47+
# This profile specifies the \StringPrep\[\"A.1\"] table as its
48+
# list of unassigned code points.
4549
TABLES_PROHIBITED_STORED = ["A.1", *TABLES_PROHIBITED].freeze
4650

47-
# Matches codepoints prohibited by RFC4013 §2.3.
51+
# A Regexp matching codepoints prohibited by RFC4013 §2.3.
4852
#
49-
# See TABLES_PROHIBITED.
50-
#
51-
# Equal to +Regexp.union+ of the TABLES_PROHIBITED tables. Redefined
52-
# here to avoid loading the StringPrep module unless necessary.
53+
# This combines all of the TABLES_PROHIBITED tables.
5354
PROHIBITED_OUTPUT = /[\u{06dd 070f 1680 180e 3000 feff e0001}\u{0000}-\u{001f}\u{007f}-\u{00a0}\u{0340}-\u{0341}\u{2000}-\u{200f}\u{2028}-\u{202f}\u{205f}-\u{2063}\u{206a}-\u{206f}\u{2ff0}-\u{2ffb}\u{e000}-\u{f8ff}\u{fdd0}-\u{fdef}\u{fff9}-\u{ffff}\u{1d173}-\u{1d17a}\u{1fffe}-\u{1ffff}\u{2fffe}-\u{2ffff}\u{3fffe}-\u{3ffff}\u{4fffe}-\u{4ffff}\u{5fffe}-\u{5ffff}\u{6fffe}-\u{6ffff}\u{7fffe}-\u{7ffff}\u{8fffe}-\u{8ffff}\u{9fffe}-\u{9ffff}\u{afffe}-\u{affff}\u{bfffe}-\u{bffff}\u{cfffe}-\u{cffff}\u{dfffe}-\u{dffff}\u{e0020}-\u{e007f}\u{efffe}-\u{10ffff}\p{Cs}]/.freeze
5455

55-
# RFC4013 §2.5 Unassigned Code Points::
56-
# This profile specifies the \StringPrep\[\"A.1\"] table as its list of
57-
# unassigned code points.
56+
# RFC4013 §2.5 Unassigned Code Points
57+
# >>>
58+
# This profile specifies the \StringPrep\[\"A.1\"] table as its
59+
# list of unassigned code points.
60+
#
61+
# Equal to \StringPrep\[\"A.1\"].
62+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
5863
UNASSIGNED = /\p{^AGE=3.2}/.freeze
5964

60-
# Matches codepoints prohibited by RFC4013 §2.3 and §2.5.
65+
# A Regexp matching codepoints prohibited by RFC4013 §2.3 and §2.5.
6166
#
62-
# See TABLES_PROHIBITED_STORED.
67+
# This combines PROHIBITED_OUTPUT and UNASSIGNED.
6368
PROHIBITED_OUTPUT_STORED = Regexp.union(
6469
UNASSIGNED, PROHIBITED_OUTPUT
6570
).freeze
6671

6772
# Bidirectional Characters [StringPrep, §6]
73+
#
74+
# A Regexp for strings that don't satisfy StringPrep's Bidirectional
75+
# Characters rules.
76+
#
77+
# Equal to StringPrep::Tables::BIDI_FAILURE.
78+
# Redefined here to avoid loading StringPrep::Tables unless necessary.
6879
BIDI_FAILURE = /(?mx-i: # RandALCat followed by LCat
6980
(?<r_and_al_cat>[\u{05be 05c0 05c3 061b 061f 06dd 0710 07b1 200f fb1d fb3e}\u{05d0}-\u{05ea}\u{05f0}-\u{05f4}\u{0621}-\u{063a}\u{0640}-\u{064a}\u{066d}-\u{066f}\u{0671}-\u{06d5}\u{06e5}-\u{06e6}\u{06fa}-\u{06fe}\u{0700}-\u{070d}\u{0712}-\u{072c}\u{0780}-\u{07a5}\u{fb1f}-\u{fb28}\u{fb2a}-\u{fb36}\u{fb38}-\u{fb3c}\u{fb40}-\u{fb41}\u{fb43}-\u{fb44}\u{fb46}-\u{fbb1}\u{fbd3}-\u{fd3d}\u{fd50}-\u{fd8f}\u{fd92}-\u{fdc7}\u{fdf0}-\u{fdfc}\u{fe70}-\u{fe74}\u{fe76}-\u{fefc}])
7081
.*?
@@ -79,17 +90,16 @@ module SASLprep
7990
\g<r_and_al_cat> .*? \g<not_r_nor_al>\z
8091
)/mx.freeze
8192

82-
# Matches strings prohibited by RFC4013 §2.3 and §2.4.
93+
# A Regexp matching strings prohibited by RFC4013 §2.3 and §2.4.
8394
#
84-
# This checks prohibited output and bidirectional characters.
95+
# This combines PROHIBITED_OUTPUT and BIDI_FAILURE.
8596
PROHIBITED = Regexp.union(
8697
PROHIBITED_OUTPUT, BIDI_FAILURE,
8798
)
8899

89-
# Matches strings prohibited by RFC4013 §2.3, §2.4, and §2.5.
100+
# A Regexp matching strings prohibited by RFC4013 §2.3, §2.4, and §2.5.
90101
#
91-
# This checks prohibited output, bidirectional characters, and
92-
# unassigned codepoints.
102+
# This combines PROHIBITED_OUTPUT_STORED and BIDI_FAILURE.
93103
PROHIBITED_STORED = Regexp.union(
94104
PROHIBITED_OUTPUT_STORED, BIDI_FAILURE,
95105
)

‎lib/net/imap/stringprep/tables.rb

Lines changed: 19 additions & 2 deletions
Large diffs are not rendered by default.

‎lib/net/imap/stringprep/trace.rb

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# frozen_string_literal: true
2+
3+
module Net
4+
class IMAP
5+
module StringPrep
6+
7+
# Defined in RFC-4505[https://tools.ietf.org/html/rfc4505] §3, The +trace+
8+
# profile of \StringPrep is used by the +ANONYMOUS+ \SASL mechanism.
9+
module Trace
10+
11+
# Defined in RFC-4505[https://tools.ietf.org/html/rfc4505] §3.
12+
STRINGPREP_PROFILE = "trace"
13+
14+
# >>>
15+
# The character repertoire of this profile is Unicode 3.2 [Unicode].
16+
UNASSIGNED_TABLE = "A.1"
17+
18+
# >>>
19+
# No mapping is required by this profile.
20+
MAPPING_TABLES = nil
21+
22+
# >>>
23+
# No Unicode normalization is required by this profile.
24+
NORMALIZATION = nil
25+
26+
# From RFC-4505[https://tools.ietf.org/html/rfc4505] §3, The "trace"
27+
# Profile of "Stringprep":
28+
# >>>
29+
# Characters from the following tables of [StringPrep] are prohibited:
30+
#
31+
# - C.2.1 (ASCII control characters)
32+
# - C.2.2 (Non-ASCII control characters)
33+
# - C.3 (Private use characters)
34+
# - C.4 (Non-character code points)
35+
# - C.5 (Surrogate codes)
36+
# - C.6 (Inappropriate for plain text)
37+
# - C.8 (Change display properties are deprecated)
38+
# - C.9 (Tagging characters)
39+
#
40+
# No additional characters are prohibited.
41+
PROHIBITED_TABLES = %w[C.2.1 C.2.2 C.3 C.4 C.5 C.6 C.8 C.9].freeze
42+
43+
# >>>
44+
# This profile requires bidirectional character checking per Section 6
45+
# of [StringPrep].
46+
CHECK_BIDI = true
47+
48+
module_function
49+
50+
# From RFC-4505[https://tools.ietf.org/html/rfc4505] §3, The "trace"
51+
# Profile of "Stringprep":
52+
# >>>
53+
# The character repertoire of this profile is Unicode 3.2 [Unicode].
54+
#
55+
# No mapping is required by this profile.
56+
#
57+
# No Unicode normalization is required by this profile.
58+
#
59+
# The list of unassigned code points for this profile is that provided
60+
# in Appendix A of [StringPrep]. Unassigned code points are not
61+
# prohibited.
62+
#
63+
# Characters from the following tables of [StringPrep] are prohibited:
64+
# (documented on PROHIBITED_TABLES)
65+
#
66+
# This profile requires bidirectional character checking per Section 6
67+
# of [StringPrep].
68+
def stringprep_trace(string, **opts)
69+
StringPrep.stringprep(
70+
string,
71+
unassigned: UNASSIGNED_TABLE,
72+
maps: MAPPING_TABLES,
73+
prohibited: PROHIBITED_TABLES,
74+
normalization: NORMALIZATION,
75+
bidi: CHECK_BIDI,
76+
profile: STRINGPREP_PROFILE,
77+
**opts,
78+
)
79+
end
80+
81+
end
82+
83+
end
84+
end
85+
end

0 commit comments

Comments
 (0)