|
13 | 13 | (:require [excel-clj.tree :as tree]
|
14 | 14 | [excel-clj.style :as style]
|
15 | 15 | [clojure.string :as string]
|
16 |
| - [clojure.java.io :as io]) |
| 16 | + [clojure.java.io :as io] |
| 17 | + [taoensso.tufte :as tufte :refer (defnp p profiled profile)]) |
17 | 18 | (:import (org.apache.poi.ss.usermodel Cell RichTextString)
|
18 |
| - (org.apache.poi.xssf.usermodel XSSFWorkbook XSSFSheet) |
19 |
| - (java.io FileOutputStream File) |
| 19 | + (org.apache.poi.xssf.usermodel XSSFWorkbook XSSFSheet XSSFRow XSSFCell) |
| 20 | + (java.io File) |
20 | 21 | (java.awt Desktop HeadlessException)
|
21 | 22 | (java.util Calendar Date)
|
22 | 23 | (org.apache.poi.ss.util CellRangeAddress)
|
23 | 24 | (org.jodconverter.office DefaultOfficeManagerBuilder)
|
24 | 25 | (org.jodconverter OfficeDocumentConverter)))
|
25 | 26 |
|
| 27 | +(set! *warn-on-reflection* true) |
| 28 | + |
26 | 29 | ;;; Low level code to write to & style sheets; you probably shouldn't have to
|
27 | 30 | ;;; touch this to make use of the API, but might choose to when adding or
|
28 | 31 | ;;; extending functionality
|
|
71 | 74 | [^Cell cell data]
|
72 | 75 | ;; These types are allowed natively
|
73 | 76 | (if-type [data [Boolean Calendar String Date Double RichTextString]]
|
74 |
| - (doto cell (.setCellValue data)) |
| 77 | + (doto cell (.setCellValue data)) |
75 | 78 |
|
76 |
| - ;; Apache POI requires that numbers be doubles |
77 |
| - (if (number? data) |
78 |
| - (doto cell (.setCellValue (double data))) |
| 79 | + ;; Apache POI requires that numbers be doubles |
| 80 | + (if (number? data) |
| 81 | + (doto cell (.setCellValue (double data))) |
79 | 82 |
|
80 |
| - ;; Otherwise stringify it |
81 |
| - (doto cell (.setCellValue ^String (or (some-> data pr-str) "")))))) |
| 83 | + ;; Otherwise stringify it |
| 84 | + (let [to-write (or (some-> data pr-str) "")] |
| 85 | + (doto cell (.setCellValue ^String to-write)))))) |
82 | 86 |
|
83 | 87 | (def ^:dynamic *max-col-width*
|
84 | 88 | "Sometimes POI's auto sizing isn't super intelligent, so set a sanity-max on
|
85 |
| - the column width." |
| 89 | + the column width." |
86 | 90 | 15000)
|
87 | 91 |
|
| 92 | +(defmacro ^:private doparallel [[sym coll] & body] |
| 93 | + "Performance hack for writing the POI cells. |
| 94 | + Like (dotimes [x xs] ...) but parallel." |
| 95 | + `(let [n# (+ 2 (.. Runtime getRuntime availableProcessors)) |
| 96 | + equal-chunks# (loop [num# n#, parts# [], coll# ~coll, c# (count ~coll)] |
| 97 | + (if (<= num# 0) |
| 98 | + parts# |
| 99 | + (let [t# (quot (+ c# num# -1) num#)] |
| 100 | + (recur (dec num#) (conj parts# (take t# coll#)) |
| 101 | + (drop t# coll#) (- c# t#))))) |
| 102 | + workers# |
| 103 | + (doall |
| 104 | + (for [chunk# equal-chunks#] |
| 105 | + (future |
| 106 | + (doseq [~sym chunk#] |
| 107 | + ~@body))))] |
| 108 | + (doseq [w# workers#] |
| 109 | + (deref w#)))) |
| 110 | + |
88 | 111 | (defn- ^XSSFSheet write-grid!
|
89 | 112 | "Modify the given workbook by adding a sheet with the given name built from
|
90 | 113 | the provided grid.
|
|
101 | 124 | build-style' (memoize ;; Immutable styles can share mutable objects :)
|
102 | 125 | (fn [style-map]
|
103 | 126 | (->> (style/merge-all style/default-style (or style-map {}))
|
104 |
| - (style/build-style workbook))))] |
| 127 | + (style/build-style workbook)))) |
| 128 | + layout (volatile! {})] |
105 | 129 | (try
|
| 130 | + |
| 131 | + ;; N.B. So this code got uglier due to performance. Writing the cells |
| 132 | + ;; takes many seconds for a large sheet (~50,000 rows) and we can improve |
| 133 | + ;; the process a bit by doing the cell creation sequentially and the cell |
| 134 | + ;; writing in parallel (on test data set reduced from ~19s to ~14s). |
| 135 | + |
| 136 | + ;; Unfortunately much of the time is spent writing to disk (~8s). |
| 137 | + |
| 138 | + ;; We have to do this part sequentially because POI doesn't use |
| 139 | + ;; thread-safe data structures |
106 | 140 | (doseq [[row-idx row-data] (map-indexed vector grid)]
|
107 |
| - (let [row (.createRow sh (int row-idx))] |
| 141 | + (let [row (p :create-row (.createRow sh (int row-idx)))] |
108 | 142 | (loop [col-idx 0 cells row-data]
|
109 | 143 | (when-let [cell-data (first cells)]
|
110 |
| - (let [cell (.createCell row col-idx) |
| 144 | + ;; (1) Build the cell |
| 145 | + (let [cell (p :create-cell (.createCell ^XSSFRow row col-idx)) |
111 | 146 | width (if (map? cell-data) (get cell-data :width 1) 1)]
|
112 |
| - (write-cell! cell (cond-> cell-data (map? cell-data) :value)) |
113 |
| - (.setCellStyle |
114 |
| - cell |
115 |
| - (build-style' (if (map? cell-data) (:style cell-data) {}))) |
| 147 | + |
| 148 | + ;; (2) Merge if necessary into adjacent cells |
116 | 149 | (when (> width 1)
|
117 | 150 | (.addMergedRegion
|
118 | 151 | sh (CellRangeAddress.
|
119 | 152 | row-idx row-idx col-idx (dec (+ col-idx width)))))
|
| 153 | + |
| 154 | + ;; (3) Save the cell |
| 155 | + (vswap! layout assoc-in [row-idx col-idx] cell) |
120 | 156 | (recur (+ col-idx ^long width) (rest cells)))))))
|
| 157 | + |
| 158 | + ;; We can do this part in parallel at least, since the cells are all |
| 159 | + ;; different objects |
| 160 | + (let [layout @layout] |
| 161 | + (doparallel [row (map-indexed vector grid)] |
| 162 | + (let [[row-idx row-data] row] |
| 163 | + (loop [col-idx 0, cells row-data] |
| 164 | + (when-let [cell-data (first cells)] |
| 165 | + ;; (1) Find the cell |
| 166 | + (let [width (if (map? cell-data) (get cell-data :width 1) 1) |
| 167 | + ^XSSFCell cell (get (get layout row-idx) col-idx)] |
| 168 | + |
| 169 | + ;; (2) Write the cell data |
| 170 | + (p :write-cell |
| 171 | + (write-cell! cell (cond-> cell-data (map? cell-data) :value))) |
| 172 | + |
| 173 | + ;; (3) Set the cell style |
| 174 | + (let [style (build-style' |
| 175 | + (if (map? cell-data) (:style cell-data) {}))] |
| 176 | + (p :set-cell-style |
| 177 | + (.setCellStyle cell style))) |
| 178 | + |
| 179 | + (recur (+ col-idx ^long width) (rest cells)))))))) |
121 | 180 | (catch Exception e
|
122 | 181 | (-> "Failed to write grid!"
|
123 | 182 | (ex-info {:sheet-name sheet-name :grid grid} e)
|
124 | 183 | (throw))))
|
125 | 184 |
|
126 | 185 | (dotimes [i (transduce (map count) (completing max) 0 grid)]
|
127 |
| - (.autoSizeColumn sh i) |
| 186 | + |
| 187 | + ;; Only auto-size small tables because it takes forever (~10s on a large |
| 188 | + ;; grid) |
| 189 | + (when (< (count grid) 2000) |
| 190 | + (p :auto-size (.autoSizeColumn sh i))) |
| 191 | + |
128 | 192 | (when (> (.getColumnWidth sh i) *max-col-width*)
|
129 | 193 | (.setColumnWidth sh i *max-col-width*)))
|
130 | 194 |
|
131 |
| - (.setFitToPage sh true) |
132 |
| - (.setFitWidth (.getPrintSetup sh) 1) |
| 195 | + (p :set-print-settings |
| 196 | + (.setFitToPage sh true) |
| 197 | + (.setFitWidth (.getPrintSetup sh) 1)) |
133 | 198 | sh))
|
134 | 199 |
|
135 | 200 | (defn- workbook!
|
|
278 | 343 | (fn [wb [sheet-name grid]] (doto wb (write-grid! sheet-name grid)))
|
279 | 344 | (workbook!)
|
280 | 345 | (seq workbook))]
|
281 |
| - (with-open [fos (FileOutputStream. (str path'))] |
282 |
| - (.write wb fos)) |
| 346 | + (p :write-to-disk |
| 347 | + (with-open [fos (io/output-stream (io/file (str path')))] |
| 348 | + (.write wb fos))) |
283 | 349 | (io/file path')))
|
284 | 350 |
|
285 | 351 | (defn convert-pdf!
|
|
359 | 425 | ;; with the same contents. On platforms without OpenOffice the convert-pdf!
|
360 | 426 | ;; call will most likely fail.
|
361 | 427 | (open (convert-pdf! (example) (temp ".pdf"))))
|
362 |
| - |
|
0 commit comments