|
8 | 8 | [tech.v3.dataset.protocols :as ds-proto] |
9 | 9 | [tech.v3.dataset.impl.column :as col-impl] |
10 | 10 | [tech.v3.dataset.string-table :as str-t] |
| 11 | + [tech.v3.dataset.dynamic-int-list :as int-list] |
11 | 12 | [ham-fisted.set :as set] |
12 | 13 | [ham-fisted.api :as hamf] |
13 | 14 | [ham-fisted.reduce :as hamf-rf] |
14 | 15 | [ham-fisted.function :as hamf-fn] |
15 | 16 | [ham-fisted.iterator :as hamf-iter]) |
16 | 17 | (:import [ham_fisted IMutList ArrayLists ITypedReduce ChunkedList] |
17 | 18 | [tech.v3.datatype Buffer ObjectReader ElemwiseDatatype LongReader DoubleReader] |
18 | | - [java.util Arrays Iterator Map])) |
| 19 | + [java.util Arrays Iterator Map List] |
| 20 | + [tech.v3.dataset.string_table StringTable])) |
19 | 21 |
|
20 | 22 | (set! *warn-on-reflection* true) |
21 | 23 | (set! *unchecked-math* :warn-on-boxed) |
|
30 | 32 | idx (Arrays/binarySearch ^bytes idx-ary (unchecked-byte idx))) |
31 | 33 | :int16 (hamf-fn/long-unary-operator |
32 | 34 | idx (Arrays/binarySearch ^shorts idx-ary (unchecked-short idx))) |
33 | | - :int32 (hamf-fn/long-unary-operator |
34 | | - idx (Arrays/binarySearch ^shorts idx-ary (unchecked-short idx))) |
| 35 | + :int32 (hamf-fn/long-unary-operator |
| 36 | + idx (Arrays/binarySearch ^ints idx-ary (unchecked-int idx))) |
35 | 37 | (hamf-fn/long-unary-operator |
36 | 38 | idx (Arrays/binarySearch ^longs idx-ary (unchecked-long idx)))))) |
37 | 39 |
|
|
258 | 260 | ^SparseCol [indexes data ^long rc metadata] |
259 | 261 | (SparseCol. indexes data rc metadata nil nil)) |
260 | 262 |
|
| 263 | +(defn- as-string-table |
| 264 | + ^StringTable [d] (when (and (instance? StringTable d) |
| 265 | + (.-str->int ^StringTable d)) |
| 266 | + d)) |
| 267 | + |
261 | 268 | (defn ->scol |
262 | 269 | ^SparseCol [col] |
263 | 270 | (cond |
|
277 | 284 | :else |
278 | 285 | (hamf/long-array valid-indexes))) |
279 | 286 | col-dt (dt/elemwise-datatype col) |
280 | | - buf-rdr (dt/->reader col) |
281 | | - data (dt/make-container col-dt (.size valid-indexes)) |
282 | | - dst (dt/->buffer data)] |
283 | | - (reduce (hamf-rf/indexed-long-accum _acc dst-idx src-idx |
284 | | - (.writeObject dst dst-idx (.readObject buf-rdr src-idx))) |
| 287 | + buf-rdr (dt/->reader (ds-proto/column-data col)) |
| 288 | + ^IMutList data (if (identical? :string col-dt) |
| 289 | + (if-let [^StringTable strt (as-string-table (ds-proto/column-data col))] |
| 290 | + (StringTable. (.-int->str strt) |
| 291 | + (.-str->int strt) |
| 292 | + (int-list/dynamic-int-list (dt/ecount valid-indexes))) |
| 293 | + (str-t/make-string-table)) |
| 294 | + (dt/make-list col-dt (.size valid-indexes)))] |
| 295 | + (reduce (case (tech.v3.datatype.casting/simple-operation-space col-dt) |
| 296 | + :int64 (fn [_acc ^long src-idx] |
| 297 | + (.addLong data (.readLong buf-rdr src-idx))) |
| 298 | + :float64 (fn [_acc ^long src-idx] |
| 299 | + (.addDouble data (.readDouble buf-rdr src-idx))) |
| 300 | + (fn [_acc ^long src-idx] |
| 301 | + (.add data (.readObject buf-rdr src-idx)))) |
285 | 302 | nil valid-indexes) |
286 | 303 | (SparseCol. valid-indexes data rc (meta col) nil nil)))) |
287 | 304 |
|
|
0 commit comments