Skip to content

Commit ab05234

Browse files
committed
✨ Add nameprep stringprep profile
This isn't used by or needed for Net::IMAP, but adding a new profile is trivial with the new generic stringprep code. Nameprep is used by IDNA, and not useful without punycode. But, with the addition of punycode, Net::IMAP could also support IDNA hostnames. On the other hand, IDNA and punycode should probably be added to `uri`, not `net-imap`. Perhaps `stringprep` should be extracted to its own gem?
1 parent fb9d9ee commit ab05234

File tree

4 files changed

+268
-1
lines changed

4 files changed

+268
-1
lines changed

lib/net/imap/stringprep.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class IMAP < Protocol
99
# codepoint table defined in the RFC-3454 appendices is matched by a Regexp
1010
# defined in this module.
1111
module StringPrep
12+
autoload :NamePrep, File.expand_path("stringprep/nameprep", __dir__)
1213
autoload :SASLprep, File.expand_path("stringprep/saslprep", __dir__)
1314
autoload :Tables, File.expand_path("stringprep/tables", __dir__)
1415
autoload :Trace, File.expand_path("stringprep/trace", __dir__)

lib/net/imap/stringprep/nameprep.rb

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# frozen_string_literal: true
2+
3+
module Net
4+
class IMAP
5+
module StringPrep
6+
7+
# Defined in RFC3491[https://tools.ietf.org/html/rfc3491], the +nameprep+
8+
# profile of "Stringprep" is:
9+
# >>>
10+
# used by the IDNA protocol for preparing domain names; it is not
11+
# designed for any other purpose. It is explicitly not designed for
12+
# processing arbitrary free text and SHOULD NOT be used for that
13+
# purpose.
14+
#
15+
# ...
16+
#
17+
# This profile specifies prohibiting using the following tables...:
18+
#
19+
# - C.1.2 (Non-ASCII space characters)
20+
# - C.2.2 (Non-ASCII control characters)
21+
# - C.3 (Private use characters)
22+
# - C.4 (Non-character code points)
23+
# - C.5 (Surrogate codes)
24+
# - C.6 (Inappropriate for plain text)
25+
# - C.7 (Inappropriate for canonical representation)
26+
# - C.8 (Change display properties are deprecated)
27+
# - C.9 (Tagging characters)
28+
#
29+
# IMPORTANT NOTE: This profile MUST be used with the IDNA protocol.
30+
# The IDNA protocol has additional prohibitions that are checked
31+
# outside of this profile.
32+
module NamePrep
33+
34+
# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §10
35+
STRINGPREP_PROFILE = "nameprep"
36+
37+
# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §2
38+
UNASSIGNED_TABLE = "A.1"
39+
40+
# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §3
41+
MAPPING_TABLES = %w[B.1 B.2].freeze
42+
43+
# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §4
44+
NORMALIZATION = :nfkc
45+
46+
# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §5
47+
PROHIBITED_TABLES = %w[C.1.2 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9].freeze
48+
49+
# From RFC3491[https://www.rfc-editor.org/rfc/rfc3491.html] §6
50+
CHECK_BIDI = true
51+
52+
module_function
53+
54+
def nameprep(string, **opts)
55+
StringPrep.stringprep(
56+
string,
57+
unassigned: UNASSIGNED_TABLE,
58+
maps: MAPPING_TABLES,
59+
prohibited: PROHIBITED_TABLES,
60+
normalization: NORMALIZATION,
61+
bidi: CHECK_BIDI,
62+
profile: STRINGPREP_PROFILE,
63+
**opts,
64+
)
65+
end
66+
end
67+
68+
end
69+
end
70+
end
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# frozen_string_literal: true
2+
3+
require "net/imap"
4+
require "test/unit"
5+
6+
class StringPrepNamePrepTest < Test::Unit::TestCase
7+
include Net::IMAP::StringPrep
8+
include Net::IMAP::StringPrep::NamePrep
9+
10+
# The following test cases were taken from
11+
# https://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.txt
12+
# ...mostly from Appendix A.
13+
14+
# Hash[name, [in, out | exception, stored = false]
15+
NAMEPREP_TEST_VECTORS = {
16+
"Map to nothing" => [
17+
"foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" \
18+
"bar\xE2\x80\x8B\xE2\x81\xA0" \
19+
"baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF",
20+
"foobarbaz"
21+
],
22+
"Case folding ASCII U+0043 U+0041 U+0046 U+0045" => [
23+
"CAFE", "cafe"
24+
],
25+
"Case folding 8bit U+00DF (german sharp s)" => [
26+
"\xC3\x9F", "ss"
27+
],
28+
"Case folding U+0130 (turkish capital I with dot)" => [
29+
"\xC4\xB0", "i\xcc\x87"
30+
],
31+
"Case folding multibyte U+0143 U+037A" => [
32+
"\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9"
33+
],
34+
"Case folding U+2121 U+33C6 U+1D7BB" => [
35+
"\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB",
36+
"telc\xE2\x88\x95""kg\xCF\x83"
37+
],
38+
"Normalization of U+006a U+030c U+00A0 U+00AA" => [
39+
"\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a"
40+
],
41+
"Case folding U+1FB7 and normalization" => [
42+
"\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9"
43+
],
44+
"Incorrect UTF-8 encoding of U+00DF" => [
45+
# n.b. this example isn't found in Appendix A, but is in §7.
46+
"\xC3\xdf", [ArgumentError, /invalid byte sequence in UTF-8/]
47+
],
48+
"Incorrect UTF-8 encoding of U+01F0" => [
49+
# n.b. Appendix A doesn't indicate an error for this, but §7 does.
50+
"\xC7\xF0", [ArgumentError, /invalid byte sequence in UTF-8/]
51+
],
52+
"Self-reverting case folding U+0390 and normalization" => [
53+
"\xCE\x90", "\xCE\x90"
54+
],
55+
"Self-reverting case folding U+03B0 and normalization" => [
56+
"\xCE\xB0", "\xCE\xB0"
57+
],
58+
"Self-reverting case folding U+1E96 and normalization" => [
59+
"\xE1\xBA\x96", "\xE1\xBA\x96"
60+
],
61+
"Self-reverting case folding U+1F56 and normalization" => [
62+
"\xE1\xBD\x96", "\xE1\xBD\x96"
63+
],
64+
"ASCII space character U+0020" => [
65+
"\x20", "\x20"
66+
],
67+
"Non-ASCII 8bit space character U+00A0" => [
68+
"\xC2\xA0", "\x20"
69+
],
70+
"Non-ASCII multibyte space character U+1680" => [
71+
"\xE1\x9A\x80", ProhibitedCodepoint
72+
],
73+
"Non-ASCII multibyte space character U+2000" => [
74+
"\xE2\x80\x80", "\x20"
75+
],
76+
"Zero Width Space U+200b" => [
77+
"\xE2\x80\x8b", ""
78+
],
79+
"Non-ASCII multibyte space character U+3000" => [
80+
"\xE3\x80\x80", "\x20"
81+
],
82+
"ASCII control characters U+0010 U+007F" => [
83+
"\x10\x7F", "\x10\x7F"
84+
],
85+
"Non-ASCII 8bit control character U+0085" => [
86+
"\xC2\x85", ProhibitedCodepoint
87+
],
88+
"Non-ASCII multibyte control character U+180E" => [
89+
"\xE1\xA0\x8E", ProhibitedCodepoint
90+
],
91+
"Zero Width No-Break Space U+FEFF" => [
92+
"\xEF\xBB\xBF", ""
93+
],
94+
"Non-ASCII control character U+1D175" => [
95+
"\xF0\x9D\x85\xB5", ProhibitedCodepoint
96+
],
97+
"Plane 0 private use character U+F123" => [
98+
"\xEF\x84\xA3", ProhibitedCodepoint
99+
],
100+
"Plane 15 private use character U+F1234" => [
101+
"\xF3\xB1\x88\xB4", ProhibitedCodepoint
102+
],
103+
"Plane 16 private use character U+10F234" => [
104+
"\xF4\x8F\x88\xB4", ProhibitedCodepoint
105+
],
106+
"Non-character code point U+8FFFE" => [
107+
"\xF2\x8F\xBF\xBE", ProhibitedCodepoint
108+
],
109+
"Non-character code point U+10FFFF" => [
110+
"\xF4\x8F\xBF\xBF", ProhibitedCodepoint
111+
],
112+
"Surrogate code U+DF42" => [
113+
"\xED\xBD\x82", [ArgumentError, /invalid byte sequence in UTF-8/]
114+
],
115+
"Non-plain text character U+FFFD" => [
116+
"\xEF\xBF\xBD", ProhibitedCodepoint
117+
],
118+
"Ideographic description character U+2FF5" => [
119+
"\xE2\xBF\xB5", ProhibitedCodepoint
120+
],
121+
"Display property character U+0341" => [
122+
"\xCD\x81", "\xCC\x81"
123+
],
124+
"Left-to-right mark U+200E" => [
125+
"\xE2\x80\x8E", ProhibitedCodepoint
126+
],
127+
"Deprecated U+202A" => [
128+
"\xE2\x80\xAA", ProhibitedCodepoint
129+
],
130+
"Language tagging character U+E0001" => [
131+
"\xF3\xA0\x80\x81", ProhibitedCodepoint
132+
],
133+
"Language tagging character U+E0042" => [
134+
"\xF3\xA0\x81\x82", ProhibitedCodepoint
135+
],
136+
"Bidi: RandALCat character U+05BE and LCat characters" => [
137+
"foo\xD6\xBE""bar",
138+
[BidiStringError, /string with RandALCat.* must not contain LCat/]
139+
],
140+
"Bidi: RandALCat character U+FD50 and LCat characters" => [
141+
"foo\xEF\xB5\x90""bar",
142+
[BidiStringError, /string with RandALCat.* must not contain LCat/]
143+
],
144+
"Bidi: RandALCat character U+FB38 and LCat characters" => [
145+
"foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar"
146+
],
147+
"Bidi: RandALCat without trailing RandALCat U+0627 U+0031" => [
148+
"\xD8\xA7\x31",
149+
[BidiStringError,
150+
/string with RandALCat.* must start and end with RandALCat/]
151+
],
152+
"Bidi: RandALCat character U+0627 U+0031 U+0628" => [
153+
"\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8"
154+
],
155+
"Unassigned code point U+E0002" => [
156+
"\xF3\xA0\x80\x82",
157+
[ProhibitedCodepoint, /contains.* unassigned code points.*Unicode 3.2/i],
158+
true
159+
],
160+
"Larger test (shrinking)" => [
161+
"X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" \
162+
"\xaa\xce\xb0\xe2\x80\x80",
163+
"xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 ",
164+
"Nameprep"
165+
],
166+
"Larger test (expanding)" => [
167+
"X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
168+
"xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" \
169+
"\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82\xa2\xe3\x83\x91" \
170+
"\xe3\x83\xbc\xe3\x83\x88"
171+
],
172+
}
173+
174+
NAMEPREP_TEST_VECTORS.each do |comment, (input, output, stored)|
175+
stored ||= false
176+
ex, message = output
177+
case output
178+
when String
179+
test comment do
180+
assert_equal output, nameprep(input, stored: stored), comment
181+
end
182+
when Class
183+
if message # in Class => ex, (String | Regexp) => message
184+
test comment do
185+
assert_raise_with_message(ex, message, comment) {
186+
nameprep(input, stored: stored)
187+
}
188+
end
189+
else # in Class => ex
190+
test comment do
191+
assert_raise(ex, comment) { nameprep(input, stored: stored) }
192+
end
193+
end
194+
end
195+
end
196+
197+
end

test/net/imap/test_stringprep_profiles.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# frozen_string_literal: true
22

33
require "net/imap"
4-
require "net/imap/sasl/stringprep"
54
require "test/unit"
65

76
class StringPrepProfilesTest < Test::Unit::TestCase

0 commit comments

Comments
 (0)