|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +require "net/imap" |
| 4 | +require "test/unit" |
| 5 | + |
| 6 | +class StringPrepNamePrepTest < Test::Unit::TestCase |
| 7 | + include Net::IMAP::StringPrep |
| 8 | + include Net::IMAP::StringPrep::NamePrep |
| 9 | + |
| 10 | + # The following test cases were taken from |
| 11 | + # https://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.txt |
| 12 | + # ...mostly from Appendix A. |
| 13 | + |
| 14 | + # Hash[name, [in, out | exception, stored = false] |
| 15 | + NAMEPREP_TEST_VECTORS = { |
| 16 | + "Map to nothing" => [ |
| 17 | + "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" \ |
| 18 | + "bar\xE2\x80\x8B\xE2\x81\xA0" \ |
| 19 | + "baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF", |
| 20 | + "foobarbaz" |
| 21 | + ], |
| 22 | + "Case folding ASCII U+0043 U+0041 U+0046 U+0045" => [ |
| 23 | + "CAFE", "cafe" |
| 24 | + ], |
| 25 | + "Case folding 8bit U+00DF (german sharp s)" => [ |
| 26 | + "\xC3\x9F", "ss" |
| 27 | + ], |
| 28 | + "Case folding U+0130 (turkish capital I with dot)" => [ |
| 29 | + "\xC4\xB0", "i\xcc\x87" |
| 30 | + ], |
| 31 | + "Case folding multibyte U+0143 U+037A" => [ |
| 32 | + "\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9" |
| 33 | + ], |
| 34 | + "Case folding U+2121 U+33C6 U+1D7BB" => [ |
| 35 | + "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB", |
| 36 | + "telc\xE2\x88\x95""kg\xCF\x83" |
| 37 | + ], |
| 38 | + "Normalization of U+006a U+030c U+00A0 U+00AA" => [ |
| 39 | + "\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a" |
| 40 | + ], |
| 41 | + "Case folding U+1FB7 and normalization" => [ |
| 42 | + "\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9" |
| 43 | + ], |
| 44 | + "Incorrect UTF-8 encoding of U+00DF" => [ |
| 45 | + # n.b. this example isn't found in Appendix A, but is in §7. |
| 46 | + "\xC3\xdf", [ArgumentError, /invalid byte sequence in UTF-8/] |
| 47 | + ], |
| 48 | + "Incorrect UTF-8 encoding of U+01F0" => [ |
| 49 | + # n.b. Appendix A doesn't indicate an error for this, but §7 does. |
| 50 | + "\xC7\xF0", [ArgumentError, /invalid byte sequence in UTF-8/] |
| 51 | + ], |
| 52 | + "Self-reverting case folding U+0390 and normalization" => [ |
| 53 | + "\xCE\x90", "\xCE\x90" |
| 54 | + ], |
| 55 | + "Self-reverting case folding U+03B0 and normalization" => [ |
| 56 | + "\xCE\xB0", "\xCE\xB0" |
| 57 | + ], |
| 58 | + "Self-reverting case folding U+1E96 and normalization" => [ |
| 59 | + "\xE1\xBA\x96", "\xE1\xBA\x96" |
| 60 | + ], |
| 61 | + "Self-reverting case folding U+1F56 and normalization" => [ |
| 62 | + "\xE1\xBD\x96", "\xE1\xBD\x96" |
| 63 | + ], |
| 64 | + "ASCII space character U+0020" => [ |
| 65 | + "\x20", "\x20" |
| 66 | + ], |
| 67 | + "Non-ASCII 8bit space character U+00A0" => [ |
| 68 | + "\xC2\xA0", "\x20" |
| 69 | + ], |
| 70 | + "Non-ASCII multibyte space character U+1680" => [ |
| 71 | + "\xE1\x9A\x80", ProhibitedCodepoint |
| 72 | + ], |
| 73 | + "Non-ASCII multibyte space character U+2000" => [ |
| 74 | + "\xE2\x80\x80", "\x20" |
| 75 | + ], |
| 76 | + "Zero Width Space U+200b" => [ |
| 77 | + "\xE2\x80\x8b", "" |
| 78 | + ], |
| 79 | + "Non-ASCII multibyte space character U+3000" => [ |
| 80 | + "\xE3\x80\x80", "\x20" |
| 81 | + ], |
| 82 | + "ASCII control characters U+0010 U+007F" => [ |
| 83 | + "\x10\x7F", "\x10\x7F" |
| 84 | + ], |
| 85 | + "Non-ASCII 8bit control character U+0085" => [ |
| 86 | + "\xC2\x85", ProhibitedCodepoint |
| 87 | + ], |
| 88 | + "Non-ASCII multibyte control character U+180E" => [ |
| 89 | + "\xE1\xA0\x8E", ProhibitedCodepoint |
| 90 | + ], |
| 91 | + "Zero Width No-Break Space U+FEFF" => [ |
| 92 | + "\xEF\xBB\xBF", "" |
| 93 | + ], |
| 94 | + "Non-ASCII control character U+1D175" => [ |
| 95 | + "\xF0\x9D\x85\xB5", ProhibitedCodepoint |
| 96 | + ], |
| 97 | + "Plane 0 private use character U+F123" => [ |
| 98 | + "\xEF\x84\xA3", ProhibitedCodepoint |
| 99 | + ], |
| 100 | + "Plane 15 private use character U+F1234" => [ |
| 101 | + "\xF3\xB1\x88\xB4", ProhibitedCodepoint |
| 102 | + ], |
| 103 | + "Plane 16 private use character U+10F234" => [ |
| 104 | + "\xF4\x8F\x88\xB4", ProhibitedCodepoint |
| 105 | + ], |
| 106 | + "Non-character code point U+8FFFE" => [ |
| 107 | + "\xF2\x8F\xBF\xBE", ProhibitedCodepoint |
| 108 | + ], |
| 109 | + "Non-character code point U+10FFFF" => [ |
| 110 | + "\xF4\x8F\xBF\xBF", ProhibitedCodepoint |
| 111 | + ], |
| 112 | + "Surrogate code U+DF42" => [ |
| 113 | + "\xED\xBD\x82", [ArgumentError, /invalid byte sequence in UTF-8/] |
| 114 | + ], |
| 115 | + "Non-plain text character U+FFFD" => [ |
| 116 | + "\xEF\xBF\xBD", ProhibitedCodepoint |
| 117 | + ], |
| 118 | + "Ideographic description character U+2FF5" => [ |
| 119 | + "\xE2\xBF\xB5", ProhibitedCodepoint |
| 120 | + ], |
| 121 | + "Display property character U+0341" => [ |
| 122 | + "\xCD\x81", "\xCC\x81" |
| 123 | + ], |
| 124 | + "Left-to-right mark U+200E" => [ |
| 125 | + "\xE2\x80\x8E", ProhibitedCodepoint |
| 126 | + ], |
| 127 | + "Deprecated U+202A" => [ |
| 128 | + "\xE2\x80\xAA", ProhibitedCodepoint |
| 129 | + ], |
| 130 | + "Language tagging character U+E0001" => [ |
| 131 | + "\xF3\xA0\x80\x81", ProhibitedCodepoint |
| 132 | + ], |
| 133 | + "Language tagging character U+E0042" => [ |
| 134 | + "\xF3\xA0\x81\x82", ProhibitedCodepoint |
| 135 | + ], |
| 136 | + "Bidi: RandALCat character U+05BE and LCat characters" => [ |
| 137 | + "foo\xD6\xBE""bar", |
| 138 | + [BidiStringError, /string with RandALCat.* must not contain LCat/] |
| 139 | + ], |
| 140 | + "Bidi: RandALCat character U+FD50 and LCat characters" => [ |
| 141 | + "foo\xEF\xB5\x90""bar", |
| 142 | + [BidiStringError, /string with RandALCat.* must not contain LCat/] |
| 143 | + ], |
| 144 | + "Bidi: RandALCat character U+FB38 and LCat characters" => [ |
| 145 | + "foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar" |
| 146 | + ], |
| 147 | + "Bidi: RandALCat without trailing RandALCat U+0627 U+0031" => [ |
| 148 | + "\xD8\xA7\x31", |
| 149 | + [BidiStringError, |
| 150 | + /string with RandALCat.* must start and end with RandALCat/] |
| 151 | + ], |
| 152 | + "Bidi: RandALCat character U+0627 U+0031 U+0628" => [ |
| 153 | + "\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8" |
| 154 | + ], |
| 155 | + "Unassigned code point U+E0002" => [ |
| 156 | + "\xF3\xA0\x80\x82", |
| 157 | + [ProhibitedCodepoint, /contains.* unassigned code points.*Unicode 3.2/i], |
| 158 | + true |
| 159 | + ], |
| 160 | + "Larger test (shrinking)" => [ |
| 161 | + "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" \ |
| 162 | + "\xaa\xce\xb0\xe2\x80\x80", |
| 163 | + "xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 ", |
| 164 | + "Nameprep" |
| 165 | + ], |
| 166 | + "Larger test (expanding)" => [ |
| 167 | + "X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80", |
| 168 | + "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" \ |
| 169 | + "\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82\xa2\xe3\x83\x91" \ |
| 170 | + "\xe3\x83\xbc\xe3\x83\x88" |
| 171 | + ], |
| 172 | + } |
| 173 | + |
| 174 | + NAMEPREP_TEST_VECTORS.each do |comment, (input, output, stored)| |
| 175 | + stored ||= false |
| 176 | + ex, message = output |
| 177 | + case output |
| 178 | + when String |
| 179 | + test comment do |
| 180 | + assert_equal output, nameprep(input, stored: stored), comment |
| 181 | + end |
| 182 | + when Class |
| 183 | + if message # in Class => ex, (String | Regexp) => message |
| 184 | + test comment do |
| 185 | + assert_raise_with_message(ex, message, comment) { |
| 186 | + nameprep(input, stored: stored) |
| 187 | + } |
| 188 | + end |
| 189 | + else # in Class => ex |
| 190 | + test comment do |
| 191 | + assert_raise(ex, comment) { nameprep(input, stored: stored) } |
| 192 | + end |
| 193 | + end |
| 194 | + end |
| 195 | + end |
| 196 | + |
| 197 | +end |
0 commit comments