Skip to content

Commit 8545148

Browse files
authored
Support bencode for nrepl server (#747)
Part of #412
1 parent 29d4ba4 commit 8545148

File tree

6 files changed

+317
-0
lines changed

6 files changed

+317
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
* Added support for `bytes` literals using a `#b` prefix (#732)
1111
* Added support for Python 3.12 (#734)
1212
* Added a default reader conditional for the current platform (`windows`, `darwin`, `linux`, etc.) (#692)
13+
* Added support for `bencode` binary encoding (part of #412)
1314

1415
### Changed
1516
* Basilisp now supports PyTest 7.0+ (#660)

docs/api/contrib/bencode.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
basilisp.contrib.bencode
2+
========================
3+
4+
.. toctree::
5+
:maxdepth: 2
6+
:caption: Contents:
7+
8+
.. autonamespace:: basilisp.contrib.bencode
9+
:members:
10+
:undoc-members:

src/basilisp/contrib/bencode.lpy

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
(ns basilisp.contrib.bencode
2+
(:require
3+
[basilisp.string :as str]))
4+
5+
(defprotocol BEncodeable
6+
(to-bencode-encodeable* [this]
7+
"Return an object which can be bencoded."))
8+
9+
(defmacro ^:private ->bytes
10+
"Convert a value into a string and encode it as UTF-8 bytes."
11+
[v]
12+
`(.encode (python/str ~v) "utf-8"))
13+
14+
(extend-protocol BEncodeable
15+
nil
16+
(to-bencode-encodeable* [this]
17+
#b "0:")
18+
python/bytes
19+
(to-bencode-encodeable* [this]
20+
(.join (python/bytes) [(->bytes (python/len this)) #b ":" this]))
21+
python/int
22+
(to-bencode-encodeable* [this]
23+
(.join (python/bytes) [#b "i" (->bytes this) #b "e"]))
24+
python/str
25+
(to-bencode-encodeable* [this]
26+
(let [encoded (->bytes this)]
27+
(.join (python/bytes) [(->bytes (python/len encoded)) #b ":" encoded]))))
28+
29+
(defn ^:private encode-kw-or-sym
30+
[o]
31+
(to-bencode-encodeable*
32+
(if-let [ns-str (namespace o)]
33+
(str ns-str "/" (name o))
34+
(name o))))
35+
36+
(defn ^:private encode-sequential
37+
[n]
38+
(let [contents (.join (python/bytes) (map to-bencode-encodeable* n))]
39+
(.join (python/bytes) [#b "l" contents #b "e"])))
40+
41+
(defn ^:private encode-dict
42+
[d]
43+
(as-> d $
44+
(into []
45+
(map (fn [[k v]]
46+
[(cond
47+
(string? k) (->bytes k)
48+
(ident? k) (->bytes
49+
(if-let [ns-str (namespace k)]
50+
(str ns-str "/" (name k))
51+
(name k)))
52+
:else (throw
53+
(ex-info
54+
"bencode dictionary keys must be one of: string, keywod, or symbol"
55+
{:type (type k)
56+
:key k})))
57+
(to-bencode-encodeable* v)]))
58+
$)
59+
(python/sorted $ ** :key first)
60+
(into [#b "d"]
61+
(mapcat (fn [[k v]]
62+
[(to-bencode-encodeable* k) v]))
63+
$)
64+
(conj $ #b "e")
65+
(.join (python/bytes) $)))
66+
67+
(extend basilisp.lang.keyword/Keyword BEncodeable {:to-bencode-encodeable* encode-kw-or-sym})
68+
(extend basilisp.lang.symbol/Symbol BEncodeable {:to-bencode-encodeable* encode-kw-or-sym})
69+
70+
(extend python/dict BEncodeable {:to-bencode-encodeable* encode-dict})
71+
(extend basilisp.lang.interfaces/IPersistentMap BEncodeable {:to-bencode-encodeable* encode-dict})
72+
73+
(extend python/list BEncodeable {:to-bencode-encodeable* encode-sequential})
74+
(extend python/tuple BEncodeable {:to-bencode-encodeable* encode-sequential})
75+
(extend basilisp.lang.interfaces/IPersistentList BEncodeable {:to-bencode-encodeable* encode-sequential})
76+
(extend basilisp.lang.interfaces/IPersistentVector BEncodeable {:to-bencode-encodeable* encode-sequential})
77+
78+
(defn encode
79+
"Encode the object ``d`` into a byte string using ``bencode`` encoding.
80+
81+
``encode`` supports encoding the following types:
82+
83+
- ``bytes``
84+
- ``int``
85+
- ``str``, which is first decided to UTF-8 ``bytes``
86+
- keywords and symbols, which are first converted to strings (including namespace,
87+
separated by '/') and then converted using the rules for ``str``s
88+
- Python ``list``
89+
- ``tuple``
90+
- Basilisp lists and vectors
91+
- ``dict``
92+
- maps
93+
94+
Mapping type keys must one of: keywords, symbols, or strings.
95+
96+
This function does not support ``float`` because the ``bencode`` specification does
97+
not support non-integer numerics.
98+
99+
Set types (including ``frozenset``, ``set``, or Basilisp's set types) are not
100+
supported due to the requirement that lists retain their original element ordering."
101+
[d]
102+
(to-bencode-encodeable* d))
103+
104+
105+
(defmacro ^:private index-of
106+
"Return the index of the first occurrence of character ``c`` (a byte string) in
107+
byte string ``b``."
108+
[b c]
109+
`(.index ~b ~c))
110+
111+
(defn- slice
112+
"Returns the slice of the ``bytes`` from the ``start`` index to
113+
the end of the array or to the ``end`` index if provided. Returns
114+
`nil` if the slice is empty.
115+
116+
Throw a `python/EOFError` exception if any of the indices are out
117+
of bounds."
118+
([bytes start]
119+
(if (< (len bytes) start)
120+
(throw (python/ValueError "out of input"))
121+
(slice bytes start nil)))
122+
([bytes start end]
123+
(if (and end (> end (len bytes)))
124+
(throw (python/ValueError "out of input"))
125+
(let [bs (get bytes (python/slice start end))]
126+
(when (> (count bs) 0)
127+
bs)))))
128+
129+
(declare decode*)
130+
131+
(defn ^:private decode-int
132+
[data]
133+
(let [data (slice data 1)
134+
i (index-of data #b "e")]
135+
[(int (slice data 0 i))
136+
(slice data (inc i))]))
137+
138+
(defn ^:private decode-byte-string
139+
[data {:keys [string-fn] :or {string-fn identity}}]
140+
(let [i (index-of data #b ":")
141+
n (int (slice data 0 i))
142+
data (slice data (inc i))]
143+
[(if (= n 0)
144+
(string-fn #b "")
145+
(string-fn (slice data 0 n)))
146+
(when data
147+
(slice data n))]))
148+
149+
(defn ^:private decode-list
150+
[data opts]
151+
(let [data (slice data 1)]
152+
(loop [data data v (transient [])]
153+
(if (= (slice data 0 1) #b "e")
154+
[(persistent! v) (slice data 1)]
155+
(let [[value data] (decode* data opts)]
156+
(recur data (conj! v value)))))))
157+
158+
(defn ^:private decode-dict
159+
[data {:keys [key-fn] :or {key-fn identity} :as opts}]
160+
(let [data (slice data 1)]
161+
(loop [data data m (transient {})]
162+
(if (= (slice data 0 1) #b "e")
163+
[(persistent! m) (slice data 1)]
164+
(let [[k data] (decode-byte-string data {:string-fn key-fn})
165+
[v data] (decode* data opts)]
166+
(recur data (assoc! m k v)))))))
167+
168+
(defn ^:private decode*
169+
[data opts]
170+
(case (slice data 0 1)
171+
#b "i" (decode-int data)
172+
#b "l" (decode-list data opts)
173+
#b "d" (decode-dict data opts)
174+
;; byte string
175+
(decode-byte-string data opts)))
176+
177+
(defn decode
178+
"Decode the first value in the bencoded ``data`` bytes according to ``opts`` and
179+
return a [decoded* rest*] vector.
180+
181+
The decoded* item in the vector is the decoded value of the first item in ``data``
182+
while rest* is the remaining unencoded values.
183+
184+
If ``data`` cannot be decoded (e.g. is incomplete or an error occurred), it returns
185+
a [nil ``data``] vector.
186+
187+
``opts`` is a map with the following optional supported keys.
188+
189+
:keyword ``:keywordize-keys``: if the decoded value is a map, keywordize its keys.
190+
:keyword ``:key-fn``: a function which will be called for each key-in a map; cannot
191+
be specified if ``:keywordize-keys`` is also specified
192+
:keyword ``:string-fn``: a function which will be called for each byte string which
193+
is not a map key; default is :lpy:fn:`basilisp.core/identity`"
194+
[data {:keys [keywordize-keys key-fn string-fn] :as opts}]
195+
(when (and keywordize-keys key-fn)
196+
(throw (ex-info "Can only specify either :keywordize-keys or :key-fn; not both"
197+
{:keywordize-keys keywordize-keys
198+
:key-fn key-fn})))
199+
(let [opts (cond-> opts
200+
keywordize-keys (-> (dissoc :keywordize-keys)
201+
(assoc :key-fn #(keyword (.decode % "utf-8")))))]
202+
(try
203+
(decode* data opts)
204+
(catch python/Exception e
205+
[nil data]))))
206+
207+
(defn decode-all
208+
"Decode all values in the bencoded ``data`` bytes and return them as
209+
a [values* incomplete*] vector.
210+
211+
The values* item is a collection of the ``data`` decoded values,
212+
while incomplete* is the rest of the ``data`` bytes that could not
213+
be decoded or nil.
214+
215+
``opts`` is a map supporting the same keys as :lpy:fn:`decode`."
216+
([data]
217+
(decode-all data {}))
218+
([data opts]
219+
(loop [items []
220+
data data]
221+
(let [[item data] (decode data opts)]
222+
(if (nil? item)
223+
[items data]
224+
(recur (conj items item) data))))))
225+

src/basilisp/lang/runtime.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,7 @@ def get(m, k, default=None): # pylint: disable=unused-argument
11831183
return default
11841184

11851185

1186+
@get.register(bytes)
11861187
@get.register(dict)
11871188
@get.register(list)
11881189
@get.register(str)

tests/basilisp/contrib/__init__.py

Whitespace-only changes.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
(ns tests.basilisp.contrib.bencode-test
2+
(:require
3+
[basilisp.contrib.bencode :as bc]
4+
[basilisp.test :refer [deftest are is testing]]))
5+
6+
(deftest bencode-encode
7+
(testing "bencode supported types"
8+
(are [v s] (= (.encode s "utf-8") (bc/encode v))
9+
nil "0:"
10+
"" "0:"
11+
-0 "i0e"
12+
0 "i0e"
13+
42 "i42e"
14+
-42 "i-42e"
15+
"spam" "4:spam"
16+
:keyword "7:keyword"
17+
:ns/kw "5:ns/kw"
18+
:other.ns/kw "11:other.ns/kw"
19+
'sym "3:sym"
20+
'ns/sym "6:ns/sym"
21+
'other.ns/sym "12:other.ns/sym"
22+
[] "le"
23+
["spam" 42] "l4:spami42ee"
24+
'("spam" 42) "l4:spami42ee"
25+
{} "de"
26+
{"bar" "spam" :foo 42 :efg/xyz 128} "d3:bar4:spam7:efg/xyzi128e3:fooi42ee"
27+
["spam" 42 {"bar" "spam" :foo 42}] "l4:spami42ed3:bar4:spam3:fooi42eee"))
28+
29+
(testing "bencode unsupported types"
30+
(are [v] (thrown? basilisp.lang.exception/ExceptionInfo (bc/encode v))
31+
-3.14
32+
0.3
33+
#{25}
34+
(seq [25 26])
35+
{45 "some map"}
36+
{["vec key"] "something bad"})))
37+
38+
(deftest bencode-decode
39+
(testing "basic"
40+
(are [s v] (= [v nil] (bc/decode (python/bytes s "utf-8") {}))
41+
"0:" #b ""
42+
"le" []
43+
"de" {}
44+
"i0e" 0
45+
"i-1e" -1
46+
"i1e" 1
47+
"i42e" 42
48+
"i-42e" -42
49+
"4:spam" #b "spam"
50+
"l4:spami42ee" [#b "spam" 42]
51+
"d3:bar4:spam3:fooi42ee" {#b "bar" #b "spam" #b "foo" 42}))
52+
53+
(testing "encoded string"
54+
(are [s v] (= [v nil] (bc/decode (python/bytes s "utf-8") {:string-fn #(.decode % "utf-8")}))
55+
"0:" ""
56+
"le" []
57+
"de" {}
58+
"i0e" 0
59+
"i-1e" -1
60+
"i1e" 1
61+
"i42e" 42
62+
"i-42e" -42
63+
"4:spam" "spam"
64+
"l4:spami42ee" ["spam" 42]
65+
"d3:bar4:spam3:fooi42ee" {#b "bar" "spam" #b "foo" 42}))
66+
67+
(testing "keywordized"
68+
(are [s v] (= v (bc/decode (python/bytes s "utf-8") {:keywordize-keys true}))
69+
"d3:bar4:spam3:fooi42ee" [{:bar #b "spam" :foo 42} nil]
70+
"d3:bard1:xi-42eee" [{:bar {:x -42}} nil]))
71+
72+
(testing "multiple"
73+
(is (= [42 #b "4:spam"] (bc/decode #b "i42e4:spam" {})))
74+
(is (= [[{#b "bar" #b "spam"} 42] nil] (bc/decode-all #b "d3:bar4:spamei42e")))
75+
(is (= [[{:bar #b "spam"} 42] nil] (bc/decode-all #b "d3:bar4:spamei42e" {:keywordize-keys true}))))
76+
77+
(testing "incomplete"
78+
(is (= [nil #b "i42"] (bc/decode #b "i42" {})))
79+
(is (= [nil #b "d3:bar4:spam3:fooi42"] (bc/decode #b "d3:bar4:spam3:fooi42" {})))
80+
(is (= [[{#b "bar" #b "spam"} 42] #b "i0"] (bc/decode-all #b "d3:bar4:spamei42ei0")))))

0 commit comments

Comments
 (0)