Skip to content

Commit d770427

Browse files
trinistrandrykonchin
authored andcommitted
Add spec for CGI.unescapeURIComponent and improve spec for .escapeURIComponent
1 parent 526c8fd commit d770427

File tree

2 files changed

+167
-20
lines changed

2 files changed

+167
-20
lines changed

library/cgi/escapeURIComponent_spec.rb

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,48 +6,67 @@
66
end
77

88
describe "CGI.escapeURIComponent" do
9-
it "escapes whitespace" do
10-
string = "&<>\" \xE3\x82\x86\xE3\x82\x93\xE3\x82\x86\xE3\x82\x93"
11-
CGI.escapeURIComponent(string).should == '%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'
9+
it "percent-encodes characters reserved according to RFC 3986" do
10+
# https://www.rfc-editor.org/rfc/rfc3986#section-2.2
11+
string = ":/?#[]@!$&'()*+,;="
12+
CGI.escapeURIComponent(string).should == "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
1213
end
1314

14-
it "does not escape with unreserved characters" do
15+
it "does not percent-encode unreserved characters according to RFC 3986" do
1516
string = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
1617
CGI.escapeURIComponent(string).should == "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
1718
end
1819

19-
it "supports String with invalid encoding" do
20-
string = "\xC0\<\<".dup.force_encoding("UTF-8")
21-
CGI.escapeURIComponent(string).should == "%C0%3C%3C"
20+
it "encodes % character as %25" do
21+
CGI.escapeURIComponent("%").should == "%25"
2222
end
2323

24-
it "processes String bytes one by one, not characters" do
25-
CGI.escapeURIComponent("β").should == "%CE%B2" # "β" bytes representation is CE B2
24+
# Compare to .escape which uses "+".
25+
it "percent-encodes single whitespace" do
26+
CGI.escapeURIComponent(" ").should == "%20"
2627
end
2728

28-
it "raises a TypeError with nil" do
29-
-> {
30-
CGI.escapeURIComponent(nil)
31-
}.should raise_error(TypeError, 'no implicit conversion of nil into String')
29+
it "percent-encodes all non-reserved and non-unreserved ASCII characters" do
30+
special_set = ":/?#[]@!$&'()*+,;=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
31+
all_other = (0x00..0x7F).filter_map { |i| i.chr unless special_set.include?(i.chr) }.join
32+
encoded = CGI.escapeURIComponent(all_other)
33+
encoded.should.match?(/\A(?:%[0-9A-F]{2}){#{all_other.length}}\z/)
3234
end
3335

34-
it "encodes empty string" do
35-
CGI.escapeURIComponent("").should == ""
36+
it "percent-encodes non-ASCII bytes" do
37+
bytes = (0x80..0xFF).map(&:chr).join
38+
encoded = CGI.escapeURIComponent(bytes)
39+
encoded.should.match?(/\A(?:%[0-9A-F]{2}){#{bytes.length}}\z/)
3640
end
3741

38-
it "encodes single whitespace" do
39-
CGI.escapeURIComponent(" ").should == "%20"
42+
it "processes multi-byte characters as separate bytes, percent-encoding each one" do
43+
CGI.escapeURIComponent("β").should == "%CE%B2" # "β" bytes representation is CE B2
4044
end
4145

42-
it "encodes double whitespace" do
43-
CGI.escapeURIComponent(" ").should == "%20%20"
46+
it "produces a copy of an empty string" do
47+
string = "".encode(Encoding::BINARY)
48+
encoded = CGI.escapeURIComponent(string)
49+
encoded.should == ""
50+
encoded.encoding.should == Encoding::BINARY
51+
string.should_not.equal?(encoded)
4452
end
4553

46-
it "preserves encoding" do
54+
it "preserves string's encoding" do
4755
string = "whatever".encode("ASCII-8BIT")
4856
CGI.escapeURIComponent(string).encoding.should == Encoding::ASCII_8BIT
4957
end
5058

59+
it "processes even strings with invalid encoding, percent-encoding octets as-is" do
60+
string = "\xC0<<".dup.force_encoding("UTF-8")
61+
CGI.escapeURIComponent(string).should == "%C0%3C%3C"
62+
end
63+
64+
it "raises a TypeError with nil" do
65+
-> {
66+
CGI.escapeURIComponent(nil)
67+
}.should raise_error(TypeError, "no implicit conversion of nil into String")
68+
end
69+
5170
it "uses implicit type conversion to String" do
5271
object = Object.new
5372
def object.to_str
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
require_relative '../../spec_helper'
2+
3+
ruby_version_is ""..."4.0" do
4+
require 'cgi'
5+
end
6+
ruby_version_is "4.0" do
7+
require 'cgi/escape'
8+
end
9+
10+
describe "CGI.unescapeURIComponent" do
11+
it "decodes any percent-encoded octets to their corresponding bytes according to RFC 3986" do
12+
string = (0x00..0xff).map { |i| "%%%02x" % i }.join
13+
expected = (0x00..0xff).map { |i| i.chr }.join.force_encoding(Encoding::UTF_8)
14+
CGI.unescapeURIComponent(string).should == expected
15+
end
16+
17+
it "disregards case of characters in a percent-encoding triplet" do
18+
CGI.unescapeURIComponent("%CE%B2abc").should == "βabc"
19+
CGI.unescapeURIComponent("%ce%b2ABC").should == "βABC"
20+
end
21+
22+
it "leaves any non-percent-encoded characters as-is" do
23+
string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ:/?#[]@!$&'()*+,;=\t\x0D\xFFβᛉ▒90%"
24+
decoded = CGI.unescapeURIComponent(string)
25+
decoded.should == string
26+
string.should_not.equal?(decoded)
27+
end
28+
29+
it "leaves sequences which can't be a percent-encoded octet as-is" do
30+
string = "%AZ%B"
31+
decoded = CGI.unescapeURIComponent(string)
32+
decoded.should == string
33+
string.should_not.equal?(decoded)
34+
end
35+
36+
it "creates a String with the specified target Encoding" do
37+
string = CGI.unescapeURIComponent("%D2%3C%3CABC", Encoding::ISO_8859_1)
38+
string.encoding.should == Encoding::ISO_8859_1
39+
string.should == "Ò<<ABC".encode("ISO-8859-1")
40+
end
41+
42+
it "accepts a string name of an Encoding" do
43+
CGI.unescapeURIComponent("%D2%3C%3CABC", "ISO-8859-1").should == "Ò<<ABC".encode("ISO-8859-1")
44+
end
45+
46+
it "raises ArgumentError if specified encoding is unknown" do
47+
-> { CGI.unescapeURIComponent("ABC", "ISO-JOKE-1") }.should raise_error(ArgumentError, "unknown encoding name - ISO-JOKE-1")
48+
end
49+
50+
ruby_version_is ""..."4.0" do
51+
it "uses CGI.accept_charset as the default target encoding" do
52+
original_charset = CGI.accept_charset
53+
CGI.accept_charset = "ISO-8859-1"
54+
decoded = CGI.unescapeURIComponent("%D2%3C%3CABC")
55+
decoded.should == "Ò<<ABC".encode("ISO-8859-1")
56+
decoded.encoding.should == Encoding::ISO_8859_1
57+
ensure
58+
CGI.accept_charset = original_charset
59+
end
60+
61+
it "has CGI.accept_charset as UTF-8 by default" do
62+
decoded = CGI.unescapeURIComponent("%CE%B2ABC")
63+
decoded.should == "βABC"
64+
decoded.encoding.should == Encoding::UTF_8
65+
end
66+
end
67+
68+
ruby_version_is "4.0" do
69+
# "cgi/escape" does not have methods to access @@accept_charset.
70+
# Full "cgi" gem provides them, allowing to possibly change it.
71+
it "uses CGI's @@accept_charset as the default target encoding" do
72+
original_charset = CGI.class_variable_get(:@@accept_charset)
73+
CGI.class_variable_set(:@@accept_charset, "ISO-8859-1")
74+
decoded = CGI.unescapeURIComponent("%D2%3C%3CABC")
75+
decoded.should == "Ò<<ABC".encode("ISO-8859-1")
76+
decoded.encoding.should == Encoding::ISO_8859_1
77+
ensure
78+
CGI.class_variable_set(:@@accept_charset, original_charset)
79+
end
80+
81+
it "has CGI's @@accept_charset as UTF-8 by default" do
82+
decoded = CGI.unescapeURIComponent("%CE%B2ABC")
83+
decoded.should == "βABC"
84+
decoded.encoding.should == Encoding::UTF_8
85+
end
86+
end
87+
88+
context "when source string specifies octets invalid in target encoding" do
89+
it "uses source string's encoding" do
90+
string = "%A2%A6%A3".encode(Encoding::SHIFT_JIS)
91+
decoded = CGI.unescapeURIComponent(string, Encoding::US_ASCII)
92+
decoded.encoding.should == Encoding::SHIFT_JIS
93+
decoded.should == "「ヲ」".encode(Encoding::SHIFT_JIS)
94+
decoded.valid_encoding?.should be_true
95+
end
96+
97+
it "uses source string's encoding even if it's also invalid" do
98+
string = "%FF".encode(Encoding::US_ASCII)
99+
decoded = CGI.unescapeURIComponent(string, Encoding::SHIFT_JIS)
100+
decoded.encoding.should == Encoding::US_ASCII
101+
decoded.should == "\xFF".dup.force_encoding(Encoding::US_ASCII)
102+
decoded.valid_encoding?.should be_false
103+
end
104+
end
105+
106+
it "decodes an empty string as an empty string with target encoding" do
107+
string = "".encode(Encoding::BINARY)
108+
decoded = CGI.unescapeURIComponent(string, "UTF-8")
109+
decoded.should == ""
110+
decoded.encoding.should == Encoding::UTF_8
111+
string.should_not.equal?(decoded)
112+
end
113+
114+
it "raises a TypeError with nil" do
115+
-> {
116+
CGI.unescapeURIComponent(nil)
117+
}.should raise_error(TypeError, "no implicit conversion of nil into String")
118+
end
119+
120+
it "uses implicit type conversion to String" do
121+
object = Object.new
122+
def object.to_str
123+
"a%20b"
124+
end
125+
126+
CGI.unescapeURIComponent(object).should == "a b"
127+
end
128+
end

0 commit comments

Comments
 (0)