|
| 1 | +(ns basilisp.contrib.bencode |
| 2 | + (:require |
| 3 | + [basilisp.string :as str])) |
| 4 | + |
| 5 | +(defprotocol BEncodeable |
| 6 | + (to-bencode-encodeable* [this] |
| 7 | + "Return an object which can be bencoded.")) |
| 8 | + |
| 9 | +(defmacro ^:private ->bytes |
| 10 | + "Convert a value into a string and encode it as UTF-8 bytes." |
| 11 | + [v] |
| 12 | + `(.encode (python/str ~v) "utf-8")) |
| 13 | + |
| 14 | +(extend-protocol BEncodeable |
| 15 | + nil |
| 16 | + (to-bencode-encodeable* [this] |
| 17 | + #b "0:") |
| 18 | + python/bytes |
| 19 | + (to-bencode-encodeable* [this] |
| 20 | + (.join (python/bytes) [(->bytes (python/len this)) #b ":" this])) |
| 21 | + python/int |
| 22 | + (to-bencode-encodeable* [this] |
| 23 | + (.join (python/bytes) [#b "i" (->bytes this) #b "e"])) |
| 24 | + python/str |
| 25 | + (to-bencode-encodeable* [this] |
| 26 | + (let [encoded (->bytes this)] |
| 27 | + (.join (python/bytes) [(->bytes (python/len encoded)) #b ":" encoded])))) |
| 28 | + |
| 29 | +(defn ^:private encode-kw-or-sym |
| 30 | + [o] |
| 31 | + (to-bencode-encodeable* |
| 32 | + (if-let [ns-str (namespace o)] |
| 33 | + (str ns-str "/" (name o)) |
| 34 | + (name o)))) |
| 35 | + |
| 36 | +(defn ^:private encode-sequential |
| 37 | + [n] |
| 38 | + (let [contents (.join (python/bytes) (map to-bencode-encodeable* n))] |
| 39 | + (.join (python/bytes) [#b "l" contents #b "e"]))) |
| 40 | + |
| 41 | +(defn ^:private encode-dict |
| 42 | + [d] |
| 43 | + (as-> d $ |
| 44 | + (into [] |
| 45 | + (map (fn [[k v]] |
| 46 | + [(cond |
| 47 | + (string? k) (->bytes k) |
| 48 | + (ident? k) (->bytes |
| 49 | + (if-let [ns-str (namespace k)] |
| 50 | + (str ns-str "/" (name k)) |
| 51 | + (name k))) |
| 52 | + :else (throw |
| 53 | + (ex-info |
| 54 | + "bencode dictionary keys must be one of: string, keywod, or symbol" |
| 55 | + {:type (type k) |
| 56 | + :key k}))) |
| 57 | + (to-bencode-encodeable* v)])) |
| 58 | + $) |
| 59 | + (python/sorted $ ** :key first) |
| 60 | + (into [#b "d"] |
| 61 | + (mapcat (fn [[k v]] |
| 62 | + [(to-bencode-encodeable* k) v])) |
| 63 | + $) |
| 64 | + (conj $ #b "e") |
| 65 | + (.join (python/bytes) $))) |
| 66 | + |
| 67 | +(extend basilisp.lang.keyword/Keyword BEncodeable {:to-bencode-encodeable* encode-kw-or-sym}) |
| 68 | +(extend basilisp.lang.symbol/Symbol BEncodeable {:to-bencode-encodeable* encode-kw-or-sym}) |
| 69 | + |
| 70 | +(extend python/dict BEncodeable {:to-bencode-encodeable* encode-dict}) |
| 71 | +(extend basilisp.lang.interfaces/IPersistentMap BEncodeable {:to-bencode-encodeable* encode-dict}) |
| 72 | + |
| 73 | +(extend python/list BEncodeable {:to-bencode-encodeable* encode-sequential}) |
| 74 | +(extend python/tuple BEncodeable {:to-bencode-encodeable* encode-sequential}) |
| 75 | +(extend basilisp.lang.interfaces/IPersistentList BEncodeable {:to-bencode-encodeable* encode-sequential}) |
| 76 | +(extend basilisp.lang.interfaces/IPersistentVector BEncodeable {:to-bencode-encodeable* encode-sequential}) |
| 77 | + |
| 78 | +(defn encode |
| 79 | + "Encode the object ``d`` into a byte string using ``bencode`` encoding. |
| 80 | + |
| 81 | + ``encode`` supports encoding the following types: |
| 82 | + |
| 83 | + - ``bytes`` |
| 84 | + - ``int`` |
| 85 | + - ``str``, which is first decided to UTF-8 ``bytes`` |
| 86 | + - keywords and symbols, which are first converted to strings (including namespace, |
| 87 | + separated by '/') and then converted using the rules for ``str``s |
| 88 | + - Python ``list`` |
| 89 | + - ``tuple`` |
| 90 | + - Basilisp lists and vectors |
| 91 | + - ``dict`` |
| 92 | + - maps |
| 93 | + |
| 94 | + Mapping type keys must one of: keywords, symbols, or strings. |
| 95 | + |
| 96 | + This function does not support ``float`` because the ``bencode`` specification does |
| 97 | + not support non-integer numerics. |
| 98 | + |
| 99 | + Set types (including ``frozenset``, ``set``, or Basilisp's set types) are not |
| 100 | + supported due to the requirement that lists retain their original element ordering." |
| 101 | + [d] |
| 102 | + (to-bencode-encodeable* d)) |
| 103 | + |
| 104 | + |
| 105 | +(defmacro ^:private index-of |
| 106 | + "Return the index of the first occurrence of character ``c`` (a byte string) in |
| 107 | + byte string ``b``." |
| 108 | + [b c] |
| 109 | + `(.index ~b ~c)) |
| 110 | + |
| 111 | +(defn- slice |
| 112 | + "Returns the slice of the ``bytes`` from the ``start`` index to |
| 113 | + the end of the array or to the ``end`` index if provided. Returns |
| 114 | + `nil` if the slice is empty. |
| 115 | + |
| 116 | + Throw a `python/EOFError` exception if any of the indices are out |
| 117 | + of bounds." |
| 118 | + ([bytes start] |
| 119 | + (if (< (len bytes) start) |
| 120 | + (throw (python/ValueError "out of input")) |
| 121 | + (slice bytes start nil))) |
| 122 | + ([bytes start end] |
| 123 | + (if (and end (> end (len bytes))) |
| 124 | + (throw (python/ValueError "out of input")) |
| 125 | + (let [bs (get bytes (python/slice start end))] |
| 126 | + (when (> (count bs) 0) |
| 127 | + bs))))) |
| 128 | + |
| 129 | +(declare decode*) |
| 130 | + |
| 131 | +(defn ^:private decode-int |
| 132 | + [data] |
| 133 | + (let [data (slice data 1) |
| 134 | + i (index-of data #b "e")] |
| 135 | + [(int (slice data 0 i)) |
| 136 | + (slice data (inc i))])) |
| 137 | + |
| 138 | +(defn ^:private decode-byte-string |
| 139 | + [data {:keys [string-fn] :or {string-fn identity}}] |
| 140 | + (let [i (index-of data #b ":") |
| 141 | + n (int (slice data 0 i)) |
| 142 | + data (slice data (inc i))] |
| 143 | + [(if (= n 0) |
| 144 | + (string-fn #b "") |
| 145 | + (string-fn (slice data 0 n))) |
| 146 | + (when data |
| 147 | + (slice data n))])) |
| 148 | + |
| 149 | +(defn ^:private decode-list |
| 150 | + [data opts] |
| 151 | + (let [data (slice data 1)] |
| 152 | + (loop [data data v (transient [])] |
| 153 | + (if (= (slice data 0 1) #b "e") |
| 154 | + [(persistent! v) (slice data 1)] |
| 155 | + (let [[value data] (decode* data opts)] |
| 156 | + (recur data (conj! v value))))))) |
| 157 | + |
| 158 | +(defn ^:private decode-dict |
| 159 | + [data {:keys [key-fn] :or {key-fn identity} :as opts}] |
| 160 | + (let [data (slice data 1)] |
| 161 | + (loop [data data m (transient {})] |
| 162 | + (if (= (slice data 0 1) #b "e") |
| 163 | + [(persistent! m) (slice data 1)] |
| 164 | + (let [[k data] (decode-byte-string data {:string-fn key-fn}) |
| 165 | + [v data] (decode* data opts)] |
| 166 | + (recur data (assoc! m k v))))))) |
| 167 | + |
| 168 | +(defn ^:private decode* |
| 169 | + [data opts] |
| 170 | + (case (slice data 0 1) |
| 171 | + #b "i" (decode-int data) |
| 172 | + #b "l" (decode-list data opts) |
| 173 | + #b "d" (decode-dict data opts) |
| 174 | + ;; byte string |
| 175 | + (decode-byte-string data opts))) |
| 176 | + |
| 177 | +(defn decode |
| 178 | + "Decode the first value in the bencoded ``data`` bytes according to ``opts`` and |
| 179 | + return a [decoded* rest*] vector. |
| 180 | + |
| 181 | + The decoded* item in the vector is the decoded value of the first item in ``data`` |
| 182 | + while rest* is the remaining unencoded values. |
| 183 | + |
| 184 | + If ``data`` cannot be decoded (e.g. is incomplete or an error occurred), it returns |
| 185 | + a [nil ``data``] vector. |
| 186 | + |
| 187 | + ``opts`` is a map with the following optional supported keys. |
| 188 | + |
| 189 | + :keyword ``:keywordize-keys``: if the decoded value is a map, keywordize its keys. |
| 190 | + :keyword ``:key-fn``: a function which will be called for each key-in a map; cannot |
| 191 | + be specified if ``:keywordize-keys`` is also specified |
| 192 | + :keyword ``:string-fn``: a function which will be called for each byte string which |
| 193 | + is not a map key; default is :lpy:fn:`basilisp.core/identity`" |
| 194 | + [data {:keys [keywordize-keys key-fn string-fn] :as opts}] |
| 195 | + (when (and keywordize-keys key-fn) |
| 196 | + (throw (ex-info "Can only specify either :keywordize-keys or :key-fn; not both" |
| 197 | + {:keywordize-keys keywordize-keys |
| 198 | + :key-fn key-fn}))) |
| 199 | + (let [opts (cond-> opts |
| 200 | + keywordize-keys (-> (dissoc :keywordize-keys) |
| 201 | + (assoc :key-fn #(keyword (.decode % "utf-8")))))] |
| 202 | + (try |
| 203 | + (decode* data opts) |
| 204 | + (catch python/Exception e |
| 205 | + [nil data])))) |
| 206 | + |
| 207 | +(defn decode-all |
| 208 | + "Decode all values in the bencoded ``data`` bytes and return them as |
| 209 | + a [values* incomplete*] vector. |
| 210 | + |
| 211 | + The values* item is a collection of the ``data`` decoded values, |
| 212 | + while incomplete* is the rest of the ``data`` bytes that could not |
| 213 | + be decoded or nil. |
| 214 | + |
| 215 | + ``opts`` is a map supporting the same keys as :lpy:fn:`decode`." |
| 216 | + ([data] |
| 217 | + (decode-all data {})) |
| 218 | + ([data opts] |
| 219 | + (loop [items [] |
| 220 | + data data] |
| 221 | + (let [[item data] (decode data opts)] |
| 222 | + (if (nil? item) |
| 223 | + [items data] |
| 224 | + (recur (conj items item) data)))))) |
| 225 | + |
0 commit comments