Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
* Added support for referring imported Python names as by `from ... import ...` (#1154)
* Added the `basilisp.url` namespace for structured URL manipulation (#1239)

### Changed
* Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192)
Expand Down
11 changes: 11 additions & 0 deletions docs/api/url.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
basilisp.url
============

.. toctree::
:maxdepth: 2
:caption: Contents:

.. autonamespace:: basilisp.url
:members:
:undoc-members:
:exclude-members: ->URL, map->URL
152 changes: 152 additions & 0 deletions src/basilisp/url.lpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
(ns basilisp.url
"Namespace providing simple utility functions for creating and modifying URLs,
wrapping functionality provided in :external:py:mod:`urllib.parse` in a more
Clojure-like API.

The primary entrypoint function is :lpy:fn:`url` which will parse the input
value into an :lpy:rec:`URL` record type, which allows for structured manipulation.
Because the source type is a record, it can be manipulated using standard Clojure
idioms such as :lpy:fn:`basilisp.core/assoc`.

After any modifications are made, :lpy:fn:`basilisp.core/str` can turn the URL
record type into a string that can be used wherever string URLs are expected.

.. code-block::

(-> (url/url \"https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries\")
(assoc :query {\"mobile\" [\"true\"]})
(str))
;; => \"https://en.wikipedia.org/wiki/Python_%28programming_language%29?mobile=true#Libraries\"

.. note::

Per the note in :external:py:func:`urllib.parse.urlunparse`, it is possible that
round-trip return from this function is not identical to the input if no other
changes are made to the URL parts, but the resulting URL should be equivalent.

.. warning::

No validation occurs creating a URL string from the provided URL record type,
so users should take care to validate any components carefully before using
these results.

To create a new URL record, :lpy:fn:`basilisp.core/assoc` keys onto
:lpy:var:`blank-url`. This ensures that your URL has the correct defaults."
(:import urllib.parse)
(:require
[basilisp.string :as str]))

(defn ^:private authority
"Return a URL's authority (called ``netloc`` in :external:py:mod:`urllib.parse`),
which consists of the 4 optional members: username, password, hostname, and port."
[url]
(let [creds (when (or (:username url) (:password url))
(str/join ":" [(urllib.parse/quote (or (:username url) "") ** :safe "")
(urllib.parse/quote (or (:password url) "") ** :safe "")]))
hostname (or (:hostname url) "")
host+port (if (:port url)
(str/join ":" [hostname (:port url)])
hostname)]
(if creds
(str/join "@" [creds host+port])
host+port)))

(defn ^:private query-string
"Convert the ``:query`` element of a URL from a map of vectors into a sequence of
key/value pairs which can be consumed by :external:py:func:`urllib.parse.urlencode`."
[url]
(->> (:query url)
(mapcat (fn [[k vs]]
(map (fn [v] #py (k v)) vs)))
(python/list)
(urllib.parse/urlencode)))

(defrecord URL [scheme username password hostname port path params query fragment]
(__str__ [self]
(let [parts #py (scheme
(authority self)
(urllib.parse/quote path)
params
(query-string self)
fragment)]
(urllib.parse/urlunparse parts))))

(defprotocol URLSource
(to-url* [this]
"Convert the input type to an :lpy:rec:`URL`."))

(extend-protocol URLSource
urllib.parse/ParseResult
(to-url* [this]
(let [query (-> (.-query this)
(urllib.parse/parse-qs)
(py->lisp :keywordize-keys false))]
(->URL (.-scheme this)
(when-let [username (.-username this)]
(urllib.parse/unquote username))
(when-let [password (.-password this)]
(urllib.parse/unquote password))
(.-hostname this)
(.-port this)
(urllib.parse/unquote (.-path this))
(.-params this)
query
(.-fragment this))))

python/str
(to-url* [this]
(to-url* (urllib.parse/urlparse this))))

(defn url
"Construct an :lpy:rec:`URL` record from the input value (such as a string) as by
:external:py:func:`urllib.parse.urlparse`.

:lpy:rec:`URL` types have the following fields which you can manipulate directly
using :lpy:fn:`basilisp.core/assoc`. The default values for each field is an empty
string unless otherwise noted.

* ``:scheme``
* ``:username`` (default ``nil``)
* ``:password`` (default ``nil``)
* ``:hostname`` (default ``nil``)
* ``:port`` (default ``nil``)
* ``:path``
* ``:params``
* ``:query`` (default ``{}``)
* ``:fragment``

.. note::

Component fields of what Python calls the ``netloc`` (\"network location\")
must be ``nil`` to be excluded from the final URL output. Empty strings are
not equivalent to ``nil``. These include ``:username``, ``:password``,
``:hostname``, and ``:port``.

.. note::

The ``:query`` component should be a mapping of string keys to vectors of
values:

.. code-block::

(:query (url/url \"http://localhost/some/path?a=1&a=2&b=3\"))
;; => {\"b\" [\"3\"] \"a\" [\"1\" \"2\"]}

.. note::

``url`` always decodes percent-encoded ``:username``, ``:password``, ``:path``, and
``:query`` values. Users should not attempt to URL encode values added to the
:lpy:rec:`URL` object returned by that function. Converting the ``URL`` back into
a string will URL encode those same fields.

.. warning::

Because this function relies on ``urllib.parse.urlparse``, it does not perform
any validation of the input URLs and all the caveats of that function apply here."
[url-str]
(to-url* url-str))

(def blank-url
"Blank :lpy:rec:`URL` type which can be used as a base for URL manipulation."
(url ""))

94 changes: 94 additions & 0 deletions tests/basilisp/test_url.lpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
(ns tests.basilisp.test-url
(:require
[basilisp.url :as url]
[basilisp.test :refer [deftest is are testing]]))

(deftest blank-url-test
(is (= url/blank-url
(url/map->URL {:scheme ""
:username nil
:password nil
:hostname nil
:port nil
:path ""
:params ""
:query {}
:fragment "" }))))

(deftest url-test
(are [res url-str] (= res (url/url url-str))
(assoc url/blank-url
:scheme "https"
:hostname "en.wikipedia.org"
:path "/wiki/Python_(programming_language)"
:fragment "Libraries")
"https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries"

(assoc url/blank-url
:scheme "http"
:username "user name"
:password "pass word"
:hostname "localhost")
"http://user%20name:pass%20word@localhost"

(assoc url/blank-url
:scheme "http"
:username ""
:password "pass word"
:hostname "localhost")
"http://:pass%20word@localhost"

(assoc url/blank-url
:scheme "http"
:username "user name"
:password ""
:hostname "localhost")
"http://user%20name:@localhost"

(assoc url/blank-url
:scheme "http"
:hostname "localhost"
:path "/path with/some spaces/")
"http://localhost/path%20with/some%20spaces/"

(assoc url/blank-url
:scheme "http"
:hostname "localhost"
:path "/path/to/some/resource"
:query {"arg" ["val with spaces"]})
"http://localhost/path/to/some/resource?arg=val+with+spaces"))

(deftest url-authority-test
(let [base-url (url/url "http://localhost")]
(testing "username and password"
(is (= (str (assoc base-url :username "user"))
"http://user:@localhost"))
(is (= (str (assoc base-url :password "pass"))
"http://:pass@localhost"))
(is (= (str (assoc base-url :username "" :password ""))
"http://:@localhost"))
(is (= (str (assoc base-url :username "user" :password "pass"))
"http://user:pass@localhost"))
(is (= (str (assoc base-url :username "user name" :password "pass word"))
"http://user%20name:pass%20word@localhost")))

(testing "hostname and port"
(is (= (str (assoc base-url :port 8080))
"http://localhost:8080"))
(is (= (str (assoc base-url :hostname nil :port 8080))
"http://:8080"))
(is (= (str (assoc base-url :hostname "chris-laptop.local" :port 8080))
"http://chris-laptop.local:8080")))

(testing "username, password, hostname, and port"
(is (= (str (assoc base-url
:username "aîlene"
:password "pass with space"
:hostname "chris-laptop.local"
:port 8080))
"http://a%C3%AElene:pass%20with%[email protected]:8080")))))

(deftest url-query-string-test
(is (contains? #{{"a" ["1"] "b" ["2" "3"]}
{"a" ["1"] "b" ["3" "2"]}}
(:query (url/url "http://localhost?a=1&b=2&b=3")))))