From 37833b946119d3b6de0734d1a528f4bf30f5283f Mon Sep 17 00:00:00 2001 From: Chris Rink Date: Sat, 19 Apr 2025 20:20:58 -0400 Subject: [PATCH 1/2] Add `basilisp.url` namespace for structured URL manipulation --- CHANGELOG.md | 1 + docs/api/url.rst | 11 +++ src/basilisp/url.lpy | 151 ++++++++++++++++++++++++++++++++++++ tests/basilisp/test_url.lpy | 25 ++++++ 4 files changed, 188 insertions(+) create mode 100644 docs/api/url.rst create mode 100644 src/basilisp/url.lpy create mode 100644 tests/basilisp/test_url.lpy diff --git a/CHANGELOG.md b/CHANGELOG.md index 8406e253..bfa4783a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added * Added support for referring imported Python names as by `from ... import ...` (#1154) + * Added the `basilisp.url` namespace for structured URL manipulation (#1239) ### Changed * Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192) diff --git a/docs/api/url.rst b/docs/api/url.rst new file mode 100644 index 00000000..8d8cbac5 --- /dev/null +++ b/docs/api/url.rst @@ -0,0 +1,11 @@ +basilisp.url +============ + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +.. autonamespace:: basilisp.url + :members: + :undoc-members: + :exclude-members: ->URL, map->URL \ No newline at end of file diff --git a/src/basilisp/url.lpy b/src/basilisp/url.lpy new file mode 100644 index 00000000..193a8acf --- /dev/null +++ b/src/basilisp/url.lpy @@ -0,0 +1,151 @@ +(ns basilisp.url + "Namespace providing simple utility functions for creating and modifying URLs, + wrapping functionality provided in :external:py:mod:`urllib.parse` in a more + Clojure-like API. + + The primary entrypoint function is :lpy:fn:`url` which will parse the input + value into an :lpy:rec:`URL` record type, which allows for structured manipulation. + Because the source type is a record, it can be manipulated using standard Clojure + idioms such as :lpy:fn:`basilisp.core/assoc`. + + After any modifications are made, :lpy:fn:`basilisp.core/str` can turn the URL + record type into a string that can be used wherever string URLs are expected. + + .. code-block:: + + (-> (url/url \"https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries\") + (assoc :query {\"mobile\" [\"true\"]}) + (str)) + ;; => \"https://en.wikipedia.org/wiki/Python_%28programming_language%29?mobile=true#Libraries\" + + .. note:: + + Per the note in :external:py:func:`urllib.parse.urlunparse`, it is possible that + round-trip return from this function is not identical to the input if no other + changes are made to the URL parts, but the resulting URL should be equivalent. + + .. warning:: + + No validation occurs creating a URL string from the provided URL record type, + so users should take care to validate any components carefully before using + these results. + + To create a new URL record, :lpy:fn:`basilisp.core/assoc` keys onto + :lpy:var:`blank-url`. This ensures that your URL has the correct defaults." + (:import urllib.parse) + (:require + [basilisp.string :as str])) + +(defn ^:private authority + "Return a URL's authority (called ``netloc`` in :external:py:mod:`urllib.parse`), + which consists of the 4 optional members: username, password, hostname, and port." + [url] + (let [creds (when (or (:username url) (:password url)) + (str/join ":" [(urllib.parse/quote (or (:username url) "") ** :safe "") + (urllib.parse/quote (or (:password url) "") ** :safe "")])) + host+port (if (:port url) + (str/join ":" [(or (:hostname url) "") (:port url)]) + (or (:hostname url) ""))] + (if creds + (str/join "@" [creds host+port]) + host+port))) + +(defn ^:private query-string + "Convert the ``:query`` element of a URL from a map of vectors into a sequence of + key/value pairs which can be consumed by :external:py:func:`urllib.parse.urlencode`." + [url] + (->> (:query url) + (mapcat (fn [[k vs]] + (map (fn [v] #py (k v)) vs))) + (python/list) + (urllib.parse/urlencode))) + +(defrecord URL [scheme username password hostname port path params query fragment] + (__str__ [self] + (let [parts #py (scheme + (authority self) + (urllib.parse/quote path) + params + (query-string self) + fragment)] + (urllib.parse/urlunparse parts)))) + +(defprotocol URLSource + (to-url* [this] + "Convert the input type to an :lpy:rec:`URL`.")) + +(extend-protocol URLSource + urllib.parse/ParseResult + (to-url* [this] + (let [query (-> (.-query this) + (urllib.parse/parse-qs) + (py->lisp :keywordize-keys false))] + (->URL (.-scheme this) + (when-let [username (.-username this)] + (urllib.parse/unquote username)) + (when-let [password (.-password this)] + (urllib.parse/unquote password)) + (.-hostname this) + (.-port this) + (urllib.parse/unquote (.-path this)) + (.-params this) + query + (.-fragment this)))) + + python/str + (to-url* [this] + (to-url* (urllib.parse/urlparse this)))) + +(defn url + "Construct an :lpy:rec:`URL` record from the input value (such as a string) as by + :external:py:func:`urllib.parse.urlparse`. + + :lpy:rec:`URL` types have the following fields which you can manipulate directly + using :lpy:fn:`basilisp.core/assoc`. The default values for each field is an empty + string unless otherwise noted. + + * ``:scheme`` + * ``:username`` (default ``nil``) + * ``:password`` (default ``nil``) + * ``:hostname`` (default ``nil``) + * ``:port`` (default ``nil``) + * ``:path`` + * ``:params`` + * ``:query`` (default ``{}``) + * ``:fragment`` + + .. note:: + + Component fields of what Python calls the ``netloc`` (\"network location\") + must be ``nil`` to be excluded from the final URL output. Empty strings are + not equivalent to ``nil``. These include ``:username``, ``:password``, + ``:hostname``, and ``:port``. + + .. note:: + + The ``:query`` component should be a mapping of string keys to vectors of + values: + + .. code-block:: + + (:query (url/url \"http://localhost/some/path?a=1&a=2&b=3\")) + ;; => {\"b\" [\"3\"] \"a\" [\"1\" \"2\"]} + + .. note:: + + ``url`` always decodes percent-encoded ``:username``, ``:password``, ``:path``, and + ``:query`` values. Users should not attempt to URL encode values added to the + :lpy:rec:`URL` object returned by that function. Converting the ``URL`` back into + a string will URL encode those same fields. + + .. warning:: + + Because this function relies on ``urllib.parse.urlparse``, it does not perform + any validation of the input URLs and all the caveats of that function apply here." + [url-str] + (to-url* url-str)) + +(def blank-url + "Blank :lpy:rec:`URL` type which can be used as a base for URL manipulation." + (url "")) + diff --git a/tests/basilisp/test_url.lpy b/tests/basilisp/test_url.lpy new file mode 100644 index 00000000..711d2c02 --- /dev/null +++ b/tests/basilisp/test_url.lpy @@ -0,0 +1,25 @@ +(ns tests.basilisp.test-url + (:require + [basilisp.url :as url] + [basilisp.test :refer [deftest is]])) + +(deftest blank-url-test + (is (= url/blank-url + (url/map->URL {:scheme "" + :username nil + :password nil + :hostname nil + :port nil + :path "" + :params "" + :query {} + :fragment "" })))) + +(deftest to-url-test + (are [res url] (= res (url/to-url url)) + (assoc url/blank-url + :scheme "https" + :hostname "en.wikipedia.org" + :path "/wiki/Python_(programming_language)" + :fragment "Libraries") + "https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries")) From 2ee0d2d8404dbbf3bc4bf4cbe6b7bfbdde458b67 Mon Sep 17 00:00:00 2001 From: Chris Rink Date: Sat, 19 Apr 2025 20:58:40 -0400 Subject: [PATCH 2/2] Fix tests --- src/basilisp/url.lpy | 5 ++- tests/basilisp/test_url.lpy | 81 ++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 8 deletions(-) diff --git a/src/basilisp/url.lpy b/src/basilisp/url.lpy index 193a8acf..113a3f44 100644 --- a/src/basilisp/url.lpy +++ b/src/basilisp/url.lpy @@ -43,9 +43,10 @@ (let [creds (when (or (:username url) (:password url)) (str/join ":" [(urllib.parse/quote (or (:username url) "") ** :safe "") (urllib.parse/quote (or (:password url) "") ** :safe "")])) + hostname (or (:hostname url) "") host+port (if (:port url) - (str/join ":" [(or (:hostname url) "") (:port url)]) - (or (:hostname url) ""))] + (str/join ":" [hostname (:port url)]) + hostname)] (if creds (str/join "@" [creds host+port]) host+port))) diff --git a/tests/basilisp/test_url.lpy b/tests/basilisp/test_url.lpy index 711d2c02..365536b1 100644 --- a/tests/basilisp/test_url.lpy +++ b/tests/basilisp/test_url.lpy @@ -1,7 +1,7 @@ (ns tests.basilisp.test-url (:require [basilisp.url :as url] - [basilisp.test :refer [deftest is]])) + [basilisp.test :refer [deftest is are testing]])) (deftest blank-url-test (is (= url/blank-url @@ -15,11 +15,80 @@ :query {} :fragment "" })))) -(deftest to-url-test - (are [res url] (= res (url/to-url url)) +(deftest url-test + (are [res url-str] (= res (url/url url-str)) (assoc url/blank-url - :scheme "https" + :scheme "https" :hostname "en.wikipedia.org" - :path "/wiki/Python_(programming_language)" + :path "/wiki/Python_(programming_language)" :fragment "Libraries") - "https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries")) + "https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries" + + (assoc url/blank-url + :scheme "http" + :username "user name" + :password "pass word" + :hostname "localhost") + "http://user%20name:pass%20word@localhost" + + (assoc url/blank-url + :scheme "http" + :username "" + :password "pass word" + :hostname "localhost") + "http://:pass%20word@localhost" + + (assoc url/blank-url + :scheme "http" + :username "user name" + :password "" + :hostname "localhost") + "http://user%20name:@localhost" + + (assoc url/blank-url + :scheme "http" + :hostname "localhost" + :path "/path with/some spaces/") + "http://localhost/path%20with/some%20spaces/" + + (assoc url/blank-url + :scheme "http" + :hostname "localhost" + :path "/path/to/some/resource" + :query {"arg" ["val with spaces"]}) + "http://localhost/path/to/some/resource?arg=val+with+spaces")) + +(deftest url-authority-test + (let [base-url (url/url "http://localhost")] + (testing "username and password" + (is (= (str (assoc base-url :username "user")) + "http://user:@localhost")) + (is (= (str (assoc base-url :password "pass")) + "http://:pass@localhost")) + (is (= (str (assoc base-url :username "" :password "")) + "http://:@localhost")) + (is (= (str (assoc base-url :username "user" :password "pass")) + "http://user:pass@localhost")) + (is (= (str (assoc base-url :username "user name" :password "pass word")) + "http://user%20name:pass%20word@localhost"))) + + (testing "hostname and port" + (is (= (str (assoc base-url :port 8080)) + "http://localhost:8080")) + (is (= (str (assoc base-url :hostname nil :port 8080)) + "http://:8080")) + (is (= (str (assoc base-url :hostname "chris-laptop.local" :port 8080)) + "http://chris-laptop.local:8080"))) + + (testing "username, password, hostname, and port" + (is (= (str (assoc base-url + :username "aƮlene" + :password "pass with space" + :hostname "chris-laptop.local" + :port 8080)) + "http://a%C3%AElene:pass%20with%20space@chris-laptop.local:8080"))))) + +(deftest url-query-string-test + (is (contains? #{{"a" ["1"] "b" ["2" "3"]} + {"a" ["1"] "b" ["3" "2"]}} + (:query (url/url "http://localhost?a=1&b=2&b=3")))))