Skip to content

Commit 8cb4c71

Browse files
author
José Valim
committed
Merge pull request #2392 from lexmag/improve-uri
Make `URI.encode/1` to conform to RFC 3986. Introduce `URI.encode/2` function.
2 parents afc7644 + ffa2abd commit 8cb4c71

File tree

2 files changed

+53
-37
lines changed

2 files changed

+53
-37
lines changed

lib/elixir/lib/uri.ex

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -145,30 +145,53 @@ defmodule URI do
145145
encode(to_string(k)) <> "=" <> encode(to_string(v))
146146
end
147147

148+
# RFC3986, section 2.2
149+
@doc """
150+
Returns true if the character is a "reserved" character in a URI.
151+
"""
152+
def char_reserved?(c) do
153+
c in ':/?#[]@!$&\'()*+,;='
154+
end
155+
156+
# RFC3986, section 2.3
157+
@doc """
158+
Returns true if the character is a "unreserved" character in a URI.
159+
"""
160+
def char_unreserved?(c) do
161+
c in ?0..?9 or
162+
c in ?a..?z or
163+
c in ?A..?Z or
164+
c in '~_-.'
165+
end
166+
167+
@doc """
168+
Returns true if the character is allowed unescaped in a URI.
169+
"""
170+
def char_unescaped?(c) do
171+
char_reserved?(c) or char_unreserved?(c)
172+
end
173+
148174
@doc """
149175
Percent-escape a URI.
176+
Accepts `predicate` function as an argument to specify if char can be left as is.
150177
151178
## Example
152179
153-
iex> URI.encode("http://elixir-lang.org/getting_started/2.html")
154-
"http%3A%2F%2Felixir-lang.org%2Fgetting_started%2F2.html"
180+
iex> URI.encode("ftp://s-ite.tld/?value=put it+й")
181+
"ftp://s-ite.tld/?value=put%20it+%D0%B9"
155182
156183
"""
157-
def encode(s), do: for(<<c <- s>>, into: "", do: percent(c))
158-
159-
defp percent(32), do: <<?+>>
160-
defp percent(?-), do: <<?->>
161-
defp percent(?_), do: <<?_>>
162-
defp percent(?.), do: <<?.>>
163-
164-
defp percent(c)
165-
when c >= ?0 and c <= ?9
166-
when c >= ?a and c <= ?z
167-
when c >= ?A and c <= ?Z do
168-
<<c>>
184+
def encode(str, predicate \\ &char_unescaped?/1) do
185+
for <<c <- str>>, into: "", do: percent(c, predicate)
169186
end
170187

171-
defp percent(c), do: "%" <> hex(bsr(c, 4)) <> hex(band(c, 15))
188+
defp percent(c, predicate) do
189+
if predicate.(c) do
190+
<<c>>
191+
else
192+
"%" <> hex(bsr(c, 4)) <> hex(band(c, 15))
193+
end
194+
end
172195

173196
defp hex(n) when n <= 9, do: <<n + ?0>>
174197
defp hex(n), do: <<n + ?A - 10>>
@@ -187,25 +210,22 @@ defmodule URI do
187210
end
188211

189212
def decode(<<?%, hex1, hex2, tail :: binary >>, uri) do
190-
<< bsl(hex2dec(hex1, uri), 4) + hex2dec(hex2, uri) >> <> decode(tail, uri)
213+
<<bsl(hex_to_dec(hex1, uri), 4) + hex_to_dec(hex2, uri)>> <> decode(tail, uri)
191214
end
192215

193216
def decode(<<head, tail :: binary >>, uri) do
194-
<<check_plus(head)>> <> decode(tail, uri)
217+
<<head>> <> decode(tail, uri)
195218
end
196219

197220
def decode(<<>>, _uri), do: <<>>
198221

199-
defp hex2dec(n, _uri) when n in ?A..?F, do: n - ?A + 10
200-
defp hex2dec(n, _uri) when n in ?a..?f, do: n - ?a + 10
201-
defp hex2dec(n, _uri) when n in ?0..?9, do: n - ?0
202-
defp hex2dec(_n, uri) do
222+
defp hex_to_dec(n, _uri) when n in ?A..?F, do: n - ?A + 10
223+
defp hex_to_dec(n, _uri) when n in ?a..?f, do: n - ?a + 10
224+
defp hex_to_dec(n, _uri) when n in ?0..?9, do: n - ?0
225+
defp hex_to_dec(_n, uri) do
203226
raise ArgumentError, "malformed URI #{inspect uri}"
204227
end
205228

206-
defp check_plus(?+), do: 32
207-
defp check_plus(c), do: c
208-
209229
@doc """
210230
Parses a URI into components.
211231

lib/elixir/test/elixir/uri_test.exs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ Code.require_file "test_helper.exs", __DIR__
33
defmodule URITest do
44
use ExUnit.Case, async: true
55

6-
test :encode_with_binary do
7-
raw = <<13, 10, 38, 60, 62, 34, 32, 227, 130, 134, 227, 130, 147, 227, 130, 134, 227, 130, 147>>
8-
expected = "%0D%0A%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93"
9-
assert URI.encode(raw) == expected
6+
test :encode do
7+
assert URI.encode("4_test.is-s~") == "4_test.is-s~"
8+
assert URI.encode("\r\n&<%>\" ゆ", &URI.char_unreserved?/1) ==
9+
"%0D%0A%26%3C%25%3E%22%20%E3%82%86"
1010
end
1111

1212
test :encode_query do
@@ -44,11 +44,12 @@ defmodule URITest do
4444
end
4545

4646
test :decode do
47-
data_to_be_decoded = "%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93"
48-
assert URI.decode(data_to_be_decoded) == "&<>\" ゆんゆん"
47+
assert URI.decode("%0D%0A%26%3C%25%3E%22%20%E3%82%86") == "\r\n&<%>\" ゆ"
48+
assert URI.decode("%2f%41%4a%55") == "/AJU"
49+
assert URI.decode("4_t+st.is-s~") == "4_t+st.is-s~"
4950

50-
assert_raise ArgumentError, ~r/malformed URI/, fn ->
51-
assert URI.decode("% invalid")
51+
assert_raise ArgumentError, ~R/malformed URI/, fn ->
52+
URI.decode("% invalid")
5253
end
5354
end
5455

@@ -180,9 +181,4 @@ defmodule URITest do
180181
assert to_string(URI.parse("http://google.com?q=lol")) == "http://google.com?q=lol"
181182
assert to_string(URI.parse("http://google.com?q=lol#omg")) == "http://google.com?q=lol#omg"
182183
end
183-
184-
test :escape do
185-
assert URI.decode("%2f%41%4a%55") == "/AJU"
186-
assert URI.decode("%2F%41%4A%55") == "/AJU"
187-
end
188184
end

0 commit comments

Comments
 (0)