Skip to content

Commit 6da79d2

Browse files
committed
Switch to POI streaming impl... 10x performance gain
1 parent 2e0e62b commit 6da79d2

File tree

2 files changed

+52
-32
lines changed

2 files changed

+52
-32
lines changed

src/excel_clj/poi.clj

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@
1010
[excel-clj.style :as style]
1111
[clojure.walk :as walk]
1212
[taoensso.tufte :as tufte])
13-
(:import (org.apache.poi.xssf.usermodel XSSFWorkbook XSSFRow XSSFSheet)
14-
(java.io Closeable)
15-
(org.apache.poi.ss.usermodel RichTextString Cell)
13+
(:import (java.io Closeable)
14+
(org.apache.poi.ss.usermodel RichTextString Sheet Cell Row Workbook)
1615
(java.util Date Calendar)
17-
(org.apache.poi.ss.util CellRangeAddress)))
16+
(org.apache.poi.ss.util CellRangeAddress)
17+
(org.apache.poi.xssf.streaming SXSSFWorkbook)
18+
(org.apache.poi.xssf.usermodel XSSFWorkbook)))
1819

1920

2021
(set! *warn-on-reflection* true)
@@ -102,21 +103,21 @@
102103
(doto cell (.setCellValue ^String to-write))))))
103104

104105

105-
(defn- ensure-row! [{:keys [^XSSFSheet sheet row row-cursor]}]
106+
(defn- ensure-row! [{:keys [^Sheet sheet row row-cursor]}]
106107
(if-let [r @row]
107108
r
108109
(let [^int idx (vswap! row-cursor inc)]
109110
(vreset! row (.createRow sheet idx)))))
110111

111112

112113
(defrecord ^:private SheetWriter
113-
[cell-style-cache ^XSSFSheet sheet row row-cursor col-cursor]
114+
[cell-style-cache ^Sheet sheet row row-cursor col-cursor]
114115
IWorksheetWriter
115116
(write! [this value]
116117
(write! this value nil 1 1))
117118

118119
(write! [this value style width height]
119-
(let [^XSSFRow poi-row (ensure-row! this)
120+
(let [^Row poi-row (ensure-row! this)
120121
^int cidx (vswap! col-cursor inc)
121122
poi-cell (.createCell poi-row cidx)]
122123

@@ -153,12 +154,12 @@
153154
Closeable
154155
(close [this]
155156
(tufte/p :set-print-settings
156-
(.setFitToPage sheet true)
157-
(.setFitWidth (.getPrintSetup sheet) 1))
157+
(.setFitToPage sheet true)
158+
(.setFitWidth (.getPrintSetup sheet) 1))
158159
this))
159160

160161

161-
(defrecord ^:private WorkbookWriter [^XSSFWorkbook workbook path]
162+
(defrecord ^:private WorkbookWriter [^Workbook workbook path]
162163
IWorkbookWriter
163164
(workbook* [this]
164165
workbook)
@@ -174,7 +175,7 @@
174175
(defn ^SheetWriter sheet-writer
175176
"Create a writer for an individual sheet within the workbook."
176177
[workbook-writer sheet-name]
177-
(let [{:keys [^XSSFWorkbook workbook path]} workbook-writer
178+
(let [{:keys [^Workbook workbook path]} workbook-writer
178179
cache (enc/memoize_
179180
(fn [style]
180181
(let [style (enc/nested-merge style/default-style style)]
@@ -188,9 +189,16 @@
188189

189190

190191
(defn ^WorkbookWriter writer
191-
"Open a writer for Excel workbooks."
192-
[path]
193-
(->WorkbookWriter (XSSFWorkbook.) path))
192+
"Open a writer for Excel workbooks.
193+
194+
If `streaming?` is true (default), uses Apache POI streaming implementations.
195+
196+
N.B. The streaming version is an order of magnitude faster than the
197+
alternative, so override this default only if you have a very good reason!"
198+
([path]
199+
(writer path true))
200+
([path streaming?]
201+
(->WorkbookWriter (if streaming? (SXSSFWorkbook.) (XSSFWorkbook.)) path)))
194202

195203

196204
(comment
@@ -205,7 +213,7 @@
205213

206214
(newline! t)
207215
(write! t "Cell")
208-
(write! t "Wide Cell" nil 2 1)
216+
(write! t "Wide Red Cell" {:font {:color :red}} 2 1)
209217

210218
(newline! t)
211219
(write! t "Tall Cell" nil 1 2)
@@ -228,10 +236,10 @@
228236

229237
(defn performance-test
230238
"Write `n-rows` of data to `to-file` and see how long it takes."
231-
[to-file n-rows]
239+
[to-file n-rows & {:keys [streaming?] :or {streaming? true}}]
232240
(let [start (System/currentTimeMillis)
233241
header-style {:border-bottom :thin :font {:bold true}}]
234-
(with-open [w (writer to-file)
242+
(with-open [w (writer to-file streaming?)
235243
sh (sheet-writer w "Test")]
236244

237245
(write! sh "Date" header-style 1 1)
@@ -250,16 +258,29 @@
250258

251259
(println "Wrote rows after" (- (System/currentTimeMillis) start) "ms"))
252260

253-
(println "Wrote file after" (- (System/currentTimeMillis) start) "ms")))
261+
(let [total (- (System/currentTimeMillis) start)]
262+
(println "Wrote file after" total "ms")
263+
total)))
254264

255265

256266
(comment
267+
"Testing overall performance, plus looking at streaming vs not streaming."
268+
269+
;; To get more detailed profiling output
257270
(tufte/add-basic-println-handler! {})
258-
(performance-test "test.xlsx" 1000) ; 103ms
259-
(tufte/profile {} (performance-test "test.xlsx" 10000)) ; 385ms
260-
(tufte/profile {} (performance-test "test.xlsx" 100000)) ; 4503ms
261-
(performance-test "test.xlsx" 150000) ; 9572ms
262-
(performance-test "test.xlsx" 200000) ; 11320ms
263-
(performance-test "test.xlsx" 300000) ; 19939ms
264-
(performance-test "test.xlsx" 350000) ; OOM error... haha
271+
272+
;;; 200,000 rows with and without streaming
273+
(tufte/profile {} (performance-test "test.xlsx" 200000 :streaming? true))
274+
;=> 2234
275+
276+
(tufte/profile {} (performance-test "test.xlsx" 200000 :streaming? false) )
277+
;=> 11187
278+
279+
280+
;;; 300,000 rows with and without streaming
281+
(tufte/profile {} (performance-test "test.xlsx" 500000 :streaming? true))
282+
;=> 5093
283+
284+
(tufte/profile {} (performance-test "test.xlsx" 500000 :streaming? false))
285+
; ... like a 2 minute delay and then OOM error (with my 8G of ram) ... haha
265286
)

src/excel_clj/style.clj

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,8 @@
6161
(:require [clojure.string :as string])
6262
(:import (org.apache.poi.ss.usermodel
6363
DataFormat BorderStyle HorizontalAlignment FontUnderline
64-
FillPatternType)
65-
(org.apache.poi.xssf.usermodel
66-
XSSFWorkbook XSSFColor DefaultIndexedColorMap XSSFCell)))
64+
FillPatternType Workbook)
65+
(org.apache.poi.xssf.usermodel XSSFColor DefaultIndexedColorMap XSSFCell)))
6766

6867
;;; Code to allow specification of Excel CellStyle objects as nested maps. You
6968
;;; might touch this code to add an implementation of `coerce-to-obj` for some
@@ -97,7 +96,7 @@
9796
so that when it's time to generate a CellStyle object, we can say that we
9897
know how to go from an attribute map to a Font object for :font attributes,
9998
from a keyword to a Color object for :color attributes, etc."
100-
(fn [^XSSFWorkbook workbook attr-keyword value]
99+
(fn [^Workbook workbook attr-keyword value]
101100
attr-keyword))
102101

103102
;; Coercions from simple map lookups
@@ -215,11 +214,11 @@
215214
(coerce-from-map :bottom-border-color colors if-color-not-found))
216215

217216
(defmethod coerce-to-obj :font
218-
[^XSSFWorkbook wb _ font-attrs]
217+
[^Workbook wb _ font-attrs]
219218
(do-set-all! (.createFont wb) font-attrs))
220219

221220
(defmethod coerce-to-obj :data-format
222-
[^XSSFWorkbook wb _ format]
221+
[^Workbook wb _ format]
223222
(if (instance? DataFormat format)
224223
format
225224
(if-let [format' (cond->> format (keyword? format) (get data-formats))]
@@ -263,7 +262,7 @@
263262
Any of the attributes can be java objects. Alternatively, if a `coerce-to-obj`
264263
implementation is provided for some attribute (e.g. :font), the attribute can
265264
be specified as data."
266-
[^XSSFWorkbook workbook attrs]
265+
[^Workbook workbook attrs]
267266
(let [attrs' (coerce-nested-to-obj workbook attrs)]
268267
(try
269268
(do-set-all! (.createCellStyle workbook) attrs')

0 commit comments

Comments
 (0)