Skip to content

Commit 941df76

Browse files
committed
Rework low-level POI interaction into a writer compatible with with-open
1 parent 329de54 commit 941df76

File tree

1 file changed

+255
-0
lines changed

1 file changed

+255
-0
lines changed

src/excel_clj/poi.clj

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
(ns excel-clj.poi
2+
"Interface that sits one level above Apache POI.
3+
4+
Handles all apache POI interaction besides styling (style.clj).
5+
See the examples at the bottom of the namespace inside of (comment ...)
6+
expressions for how to use the writers."
7+
{:author "Matthew Downey"}
8+
(:require [clojure.java.io :as io]
9+
[taoensso.encore :as enc]
10+
[excel-clj.style :as style]
11+
[clojure.walk :as walk]
12+
[taoensso.tufte :as tufte])
13+
(:import (org.apache.poi.xssf.usermodel XSSFWorkbook XSSFRow XSSFSheet)
14+
(java.io Closeable)
15+
(org.apache.poi.ss.usermodel RichTextString Cell)
16+
(java.util Date Calendar)
17+
(org.apache.poi.ss.util CellRangeAddress)))
18+
19+
20+
(set! *warn-on-reflection* true)
21+
22+
23+
(defprotocol IWorkbookWriter
24+
(workbook* [this]
25+
"Get the underlying Apache POI XSSFWorkbook object."))
26+
27+
28+
(defprotocol IWorksheetWriter
29+
(write! [this value] [this value style width height]
30+
"Write a single cell.
31+
32+
If provided, `style` is a map shaped as described in excel-clj.style.
33+
34+
Width and height determine cell merging, e.g. a width of 2 describes a
35+
cell that is merged into the cell to the right.")
36+
37+
(newline! [this]
38+
"Skip the writer to the next row in the worksheet.")
39+
40+
(autosize!! [this idx]
41+
"Autosize one of the column at `idx`. N.B. this takes forever.")
42+
43+
(sheet* [this]
44+
"Get the underlying Apache POI XSSFSheet object."))
45+
46+
47+
(defmacro ^:private if-type
48+
"For situations where there are overloads of a Java method that accept
49+
multiple types and you want to either call the method with a correct type
50+
hint (avoiding reflection) or do something else.
51+
52+
In the `if-true` form, the given `sym` becomes type hinted with the type in
53+
`types` where (instance? type sym). Otherwise the `if-false` form is run."
54+
[[sym types] if-true if-false]
55+
(let [typed-sym (gensym)]
56+
(letfn [(with-hint [type]
57+
(let [using-hinted
58+
;; Replace uses of the un-hinted symbol if-true form with
59+
;; the generated symbol, to which we're about to add a hint
60+
(walk/postwalk-replace {sym typed-sym} if-true)]
61+
;; Let the generated sym with a hint, e.g. (let [^Float x ...])
62+
`(let [~(with-meta typed-sym {:tag type}) ~sym]
63+
~using-hinted)))
64+
(condition [type] (list `(instance? ~type ~sym) (with-hint type)))]
65+
`(cond
66+
~@(mapcat condition types)
67+
:else ~if-false))))
68+
69+
70+
;; Example of the use of if-type
71+
(comment
72+
(let [test-fn #(time (reduce + (map % (repeat 1000000 "asdf"))))
73+
reflection (fn [x] (.length x))
74+
len-hinted (fn [^String x] (.length x))
75+
if-type' (fn [x] (if-type [x [String]]
76+
(.length x)
77+
;; So we know it executes the if-true path
78+
(throw (RuntimeException.))))]
79+
(println "Running...")
80+
(print "With manual type hinting =>" (with-out-str (test-fn len-hinted)))
81+
(print "With if-type hinting =>" (with-out-str (test-fn if-type')))
82+
(print "With reflection => ")
83+
(flush)
84+
(print (with-out-str (test-fn reflection)))))
85+
86+
87+
(defn- write-cell!
88+
"Write the given data to the mutable cell object, coercing its type if
89+
necessary."
90+
[^Cell cell data]
91+
;; These types are allowed natively
92+
(if-type
93+
[data [Boolean Calendar String Date Double RichTextString]]
94+
(doto cell (.setCellValue data))
95+
96+
;; Apache POI requires that numbers be doubles
97+
(if (number? data)
98+
(doto cell (.setCellValue (double data)))
99+
100+
;; Otherwise stringify it
101+
(let [to-write (or (some-> data pr-str) "")]
102+
(doto cell (.setCellValue ^String to-write))))))
103+
104+
105+
(defn- ensure-row! [{:keys [^XSSFSheet sheet row row-cursor]}]
106+
(if-let [r @row]
107+
r
108+
(let [^int idx (vswap! row-cursor inc)]
109+
(vreset! row (.createRow sheet idx)))))
110+
111+
112+
(defrecord ^:private SheetWriter
113+
[cell-style-cache ^XSSFSheet sheet row row-cursor col-cursor]
114+
IWorksheetWriter
115+
(write! [this value]
116+
(write! this value nil 1 1))
117+
118+
(write! [this value style width height]
119+
(let [^XSSFRow poi-row (ensure-row! this)
120+
^int cidx (vswap! col-cursor inc)
121+
poi-cell (.createCell poi-row cidx)]
122+
123+
(when (or (> width 1) (> height 1))
124+
(let [ridx @row-cursor
125+
cra (CellRangeAddress.
126+
ridx (dec (+ ridx height))
127+
cidx (dec (+ cidx width)))]
128+
(.addMergedRegion sheet cra)))
129+
130+
(tufte/p :write-cell
131+
(write-cell! poi-cell value))
132+
133+
(when-let [cell-style (cell-style-cache style)]
134+
(tufte/p :style-cell
135+
(.setCellStyle poi-cell cell-style))))
136+
137+
this)
138+
139+
(newline! [this]
140+
(vreset! row nil)
141+
(vreset! col-cursor -1)
142+
this)
143+
144+
(autosize!! [this idx]
145+
(tufte/p :auto-size (.autoSizeColumn sheet idx)))
146+
147+
(sheet* [this]
148+
sheet)
149+
150+
Closeable
151+
(close [this]
152+
(tufte/p :set-print-settings
153+
(.setFitToPage sheet true)
154+
(.setFitWidth (.getPrintSetup sheet) 1))
155+
this))
156+
157+
158+
(defrecord ^:private WorkbookWriter [^XSSFWorkbook workbook path]
159+
IWorkbookWriter
160+
(workbook* [this]
161+
workbook)
162+
163+
Closeable
164+
(close [this]
165+
(tufte/p :write-to-disk
166+
(with-open [fos (io/output-stream (io/file path))]
167+
(.write workbook fos)
168+
(.close workbook)))))
169+
170+
171+
(defn ^SheetWriter sheet-writer
172+
"Create a writer for an individual sheet within the workbook."
173+
[workbook-writer sheet-name]
174+
(let [{:keys [^XSSFWorkbook workbook path]} workbook-writer
175+
cache (enc/memoize_
176+
(fn [style]
177+
(let [style (enc/nested-merge style/default-style style)]
178+
(style/build-style workbook style))))]
179+
(map->SheetWriter
180+
{:cell-style-cache cache
181+
:sheet (.createSheet workbook ^String sheet-name)
182+
:row (volatile! nil)
183+
:row-cursor (volatile! -1)
184+
:col-cursor (volatile! -1)})))
185+
186+
187+
(defn ^WorkbookWriter writer
188+
"Open a writer for Excel workbooks."
189+
[path]
190+
(->WorkbookWriter (XSSFWorkbook.) path))
191+
192+
193+
(comment
194+
"For example..."
195+
196+
(with-open [w (writer "test.xlsx")
197+
t (sheet-writer w "Test")]
198+
(let [header-style {:border-bottom :thin :font {:bold true}}]
199+
(write! t "First Col" header-style 1 1)
200+
(write! t "Second Col" header-style 1 1)
201+
(write! t "Third Col" header-style 1 1)
202+
203+
(newline! t)
204+
(write! t "Cell")
205+
(write! t "Wide Cell" nil 2 1)
206+
207+
(newline! t)
208+
(write! t "Tall Cell" nil 1 2)
209+
(write! t "Cell 2")
210+
(write! t "Cell 3")
211+
212+
(newline! t)
213+
;; This one won't be visible, because it's hidden behind the tall cell
214+
(write! t "1")
215+
(write! t "2")
216+
(write! t "3"))))
217+
218+
219+
(defn performance-test
220+
"Write `n-rows` of data to `to-file` and see how long it takes."
221+
[to-file n-rows]
222+
(let [start (System/currentTimeMillis)
223+
header-style {:border-bottom :thin :font {:bold true}}]
224+
(with-open [w (writer to-file)
225+
sh (sheet-writer w "Test")]
226+
227+
(write! sh "Date" header-style 1 1)
228+
(write! sh "Milliseconds" header-style 1 1)
229+
(write! sh "Days Since Start of 2018" header-style 1 1)
230+
(println "Wrote headers after" (- (System/currentTimeMillis) start) "ms")
231+
232+
(let [start-ms (inst-ms #inst"2018")
233+
day-ms (enc/ms :days 1)]
234+
(dotimes [i n-rows]
235+
(let [ms (+ start-ms (* day-ms i))]
236+
(newline! sh)
237+
(write! sh (Date. ^long ms))
238+
(write! sh ms)
239+
(write! sh i))))
240+
241+
(println "Wrote rows after" (- (System/currentTimeMillis) start) "ms"))
242+
243+
(println "Wrote file after" (- (System/currentTimeMillis) start) "ms")))
244+
245+
246+
(comment
247+
(tufte/add-basic-println-handler! {})
248+
(performance-test "test.xlsx" 1000) ; 103ms
249+
(tufte/profile {} (performance-test "test.xlsx" 10000)) ; 385ms
250+
(tufte/profile {} (performance-test "test.xlsx" 100000)) ; 4503ms
251+
(performance-test "test.xlsx" 150000) ; 9572ms
252+
(performance-test "test.xlsx" 200000) ; 11320ms
253+
(performance-test "test.xlsx" 300000) ; 19939ms
254+
(performance-test "test.xlsx" 350000) ; OOM error... haha
255+
)

0 commit comments

Comments
 (0)