Skip to content

Commit 440f661

Browse files
committed
Merge branch 'develop' for v1.3.0
2 parents 329de54 + a9816df commit 440f661

File tree

8 files changed

+550
-218
lines changed

8 files changed

+550
-218
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Change Log
22

3+
## [1.3.0] - 2020-04-05
4+
### Added
5+
- A lower-level, writer style interface for Apache POI.
6+
- [Prototype/brainstorm](src/excel_clj/prototype.clj) of less complicated,
7+
pure-data replacement for high-level API in upcoming v2 release.
8+
### Fixed
9+
- Bug (#3) with the way cells were being written via POI that would write cells
10+
out of order or mix up the style data between cells.
11+
312
## [1.2.1] - 2020-04-01
413
### Added
514
- Can bind a dynamic `*n-threads*` var to set the number of threads used during

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ common sense styling.
88

99
Lein:
1010
```
11-
[org.clojars.mjdowney/excel-clj "1.2.1"]
11+
[org.clojars.mjdowney/excel-clj "1.3.0"]
1212
```
1313

1414
- [Getting Started](#getting-started)

project.clj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
(defproject org.clojars.mjdowney/excel-clj "1.2.1"
1+
(defproject org.clojars.mjdowney/excel-clj "1.3.0"
22
:description "Generate Excel documents & PDFs from Clojure data."
33
:url "https://github.com/matthewdowney/excel-clj"
44
:license {:name "Eclipse Public License"
55
:url "http://www.eclipse.org/legal/epl-v10.html"}
6-
:dependencies [[org.clojure/clojure "1.10.0"]
6+
:dependencies [[org.clojure/clojure "1.10.1"]
77
[com.taoensso/tufte "2.0.1"]
88
[rhizome "0.2.9"]
99
[org.apache.poi/poi-ooxml "4.0.0"]

src/excel_clj/core.clj

Lines changed: 49 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
(ns
2-
^{:doc "Utilities for declarative creation of Excel (.xlsx) spreadsheets,
1+
(ns excel-clj.core
2+
"Utilities for declarative creation of Excel (.xlsx) spreadsheets,
33
with higher level abstractions over Apache POI (https://poi.apache.org/).
44
55
The highest level data abstraction used to create excel spreadsheets is a
@@ -9,203 +9,27 @@
99
grid of [[cell]].
1010
1111
Run the (example) function at the bottom of this namespace to see more."
12-
:author "Matthew Downey"} excel-clj.core
12+
{:author "Matthew Downey"}
1313
(:require [excel-clj.tree :as tree]
1414
[excel-clj.style :as style]
15+
[excel-clj.prototype :as pt]
1516
[clojure.string :as string]
16-
[clojure.java.io :as io]
17-
[taoensso.tufte :as tufte :refer (defnp p profiled profile)])
18-
(:import (org.apache.poi.ss.usermodel Cell RichTextString)
19-
(org.apache.poi.xssf.usermodel XSSFWorkbook XSSFSheet XSSFRow XSSFCell)
20-
(java.io File)
17+
[clojure.java.io :as io])
18+
(:import (java.io File)
2119
(java.awt Desktop HeadlessException)
22-
(java.util Calendar Date)
23-
(org.apache.poi.ss.util CellRangeAddress)
2420
(org.jodconverter.office DefaultOfficeManagerBuilder)
2521
(org.jodconverter OfficeDocumentConverter)))
2622

2723
(set! *warn-on-reflection* true)
2824

29-
;;; Low level code to write to & style sheets; you probably shouldn't have to
30-
;;; touch this to make use of the API, but might choose to when adding or
31-
;;; extending functionality
32-
33-
(defmacro ^:private if-type
34-
"For situations where there are overloads of a Java method that accept
35-
multiple types and you want to either call the method with a correct type
36-
hint (avoiding reflection) or do something else.
37-
38-
In the `if-true` form, the given `sym` becomes type hinted with the type in
39-
`types` where (instance? type sym). Otherwise the `if-false` form is run."
40-
[[sym types] if-true if-false]
41-
(let [typed-sym (gensym)]
42-
(letfn [(with-hint [type]
43-
(let [using-hinted
44-
;; Replace uses of the un-hinted symbol if-true form with
45-
;; the generated symbol, to which we're about to add a hint
46-
(clojure.walk/postwalk-replace {sym typed-sym} if-true)]
47-
;; Let the generated sym with a hint, e.g. (let [^Float x ...])
48-
`(let [~(with-meta typed-sym {:tag type}) ~sym]
49-
~using-hinted)))
50-
(condition [type] (list `(instance? ~type ~sym) (with-hint type)))]
51-
`(cond
52-
~@(mapcat condition types)
53-
:else ~if-false))))
54-
55-
;; Example of the use of if-type
56-
(comment
57-
(let [test-fn #(time (reduce + (map % (repeat 1000000 "asdf"))))
58-
reflection (fn [x] (.length x))
59-
len-hinted (fn [^String x] (.length x))
60-
if-type' (fn [x] (if-type [x [String]]
61-
(.length x)
62-
;; So we know it executes the if-true path
63-
(throw (RuntimeException.))))]
64-
(println "Running...")
65-
(print "With manual type hinting =>" (with-out-str (test-fn len-hinted)))
66-
(print "With if-type hinting =>" (with-out-str (test-fn if-type')))
67-
(print "With reflection => ")
68-
(flush)
69-
(print (with-out-str (test-fn reflection)))))
70-
71-
(defn- write-cell!
72-
"Write the given data to the mutable cell object, coercing its type if
73-
necessary."
74-
[^Cell cell data]
75-
;; These types are allowed natively
76-
(if-type [data [Boolean Calendar String Date Double RichTextString]]
77-
(doto cell (.setCellValue data))
78-
79-
;; Apache POI requires that numbers be doubles
80-
(if (number? data)
81-
(doto cell (.setCellValue (double data)))
82-
83-
;; Otherwise stringify it
84-
(let [to-write (or (some-> data pr-str) "")]
85-
(doto cell (.setCellValue ^String to-write))))))
86-
87-
(def ^:dynamic *max-col-width*
88-
"Sometimes POI's auto sizing isn't super intelligent, so set a sanity-max on
89-
the column width."
25+
(def ^{:dynamic true :deprecated true} *max-col-width*
26+
"Deprecated -- no longer has any effect."
9027
15000)
9128

92-
(def ^:dynamic *n-threads*
93-
"Allow a custom number of threads used during writing."
29+
(def ^{:dynamic true :deprecated true} *n-threads*
30+
"Deprecated -- no longer has any effect."
9431
(+ 2 (.. Runtime getRuntime availableProcessors)))
9532

96-
(defmacro ^:private doparallel [[sym coll] & body]
97-
"Performance hack for writing the POI cells.
98-
Like (dotimes [x xs] ...) but parallel."
99-
`(let [n# *n-threads*
100-
equal-chunks# (loop [num# n#, parts# [], coll# ~coll, c# (count ~coll)]
101-
(if (<= num# 0)
102-
parts#
103-
(let [t# (quot (+ c# num# -1) num#)]
104-
(recur (dec num#) (conj parts# (take t# coll#))
105-
(drop t# coll#) (- c# t#)))))
106-
workers#
107-
(doall
108-
(for [chunk# equal-chunks#]
109-
(future
110-
(doseq [~sym chunk#]
111-
~@body))))]
112-
(doseq [w# workers#]
113-
(deref w#))))
114-
115-
(defn- ^XSSFSheet write-grid!
116-
"Modify the given workbook by adding a sheet with the given name built from
117-
the provided grid.
118-
119-
The grid is a collection of rows, where each cell is either a plain, non-map
120-
value or a map of {:value ..., :style ..., :width ...}, with :value being the
121-
contents of the cell, :style being an optional map of style data, and :width
122-
being an optional cell width dictating how many horizontal slots the cell
123-
takes up (creates merged cells).
124-
125-
Returns the sheet object."
126-
[^XSSFWorkbook workbook ^String sheet-name grid]
127-
(let [^XSSFSheet sh (.createSheet workbook sheet-name)
128-
build-style' (memoize ;; Immutable styles can share mutable objects :)
129-
(fn [style-map]
130-
(->> (style/merge-all style/default-style (or style-map {}))
131-
(style/build-style workbook))))
132-
layout (volatile! {})]
133-
(try
134-
135-
;; N.B. So this code got uglier due to performance. Writing the cells
136-
;; takes many seconds for a large sheet (~50,000 rows) and we can improve
137-
;; the process a bit by doing the cell creation sequentially and the cell
138-
;; writing in parallel (on test data set reduced from ~19s to ~14s).
139-
140-
;; Unfortunately much of the time is spent writing to disk (~8s).
141-
142-
;; We have to do this part sequentially because POI doesn't use
143-
;; thread-safe data structures
144-
(doseq [[row-idx row-data] (map-indexed vector grid)]
145-
(let [row (p :create-row (.createRow sh (int row-idx)))]
146-
(loop [col-idx 0 cells row-data]
147-
(when-let [cell-data (first cells)]
148-
;; (1) Build the cell
149-
(let [cell (p :create-cell (.createCell ^XSSFRow row col-idx))
150-
width (if (map? cell-data) (get cell-data :width 1) 1)]
151-
152-
;; (2) Merge if necessary into adjacent cells
153-
(when (> width 1)
154-
(.addMergedRegion
155-
sh (CellRangeAddress.
156-
row-idx row-idx col-idx (dec (+ col-idx width)))))
157-
158-
;; (3) Save the cell
159-
(vswap! layout assoc-in [row-idx col-idx] cell)
160-
(recur (+ col-idx ^long width) (rest cells)))))))
161-
162-
;; We can do this part in parallel at least, since the cells are all
163-
;; different objects
164-
(let [layout @layout]
165-
(doparallel [row (map-indexed vector grid)]
166-
(let [[row-idx row-data] row]
167-
(loop [col-idx 0, cells row-data]
168-
(when-let [cell-data (first cells)]
169-
;; (1) Find the cell
170-
(let [width (if (map? cell-data) (get cell-data :width 1) 1)
171-
^XSSFCell cell (get (get layout row-idx) col-idx)]
172-
173-
;; (2) Write the cell data
174-
(p :write-cell
175-
(write-cell! cell (cond-> cell-data (map? cell-data) :value)))
176-
177-
;; (3) Set the cell style
178-
(let [style (build-style'
179-
(if (map? cell-data) (:style cell-data) {}))]
180-
(p :set-cell-style
181-
(.setCellStyle cell style)))
182-
183-
(recur (+ col-idx ^long width) (rest cells))))))))
184-
(catch Exception e
185-
(-> "Failed to write grid!"
186-
(ex-info {:sheet-name sheet-name :grid grid} e)
187-
(throw))))
188-
189-
(dotimes [i (transduce (map count) (completing max) 0 grid)]
190-
191-
;; Only auto-size small tables because it takes forever (~10s on a large
192-
;; grid)
193-
(when (< (count grid) 2000)
194-
(p :auto-size (.autoSizeColumn sh i)))
195-
196-
(when (> (.getColumnWidth sh i) *max-col-width*)
197-
(.setColumnWidth sh i *max-col-width*)))
198-
199-
(p :set-print-settings
200-
(.setFitToPage sh true)
201-
(.setFitWidth (.getPrintSetup sh) 1))
202-
sh))
203-
204-
(defn- workbook!
205-
"Create a new Apache POI XSSFWorkbook workbook object."
206-
[]
207-
(XSSFWorkbook.))
208-
20933
;;; Higher-level code to specify grids in terms of clojure data structures,
21034
;;; organized as either a table or a tree
21135

@@ -242,16 +66,15 @@
24266
{:value (get row col-name)
24367
:style style}))
24468
getters (map (fn [col-name] #(data-cell col-name %)) headers)
245-
rows (mapv (apply juxt getters) tabular-data)
24669
header-style (or header-style
24770
;; Add right alignment if it's an accounting column
24871
(fn [name]
24972
(cond-> (style/default-header-style name)
25073
(@numeric? name)
25174
(assoc :alignment :right))))]
252-
(into
253-
[(mapv #(->{:value % :style (header-style %)}) headers)]
254-
rows)))
75+
(cons
76+
(map (fn [x] {:value x :style (header-style x)}) headers)
77+
(map (apply juxt getters) tabular-data))))
25578

25679
(defn tree
25780
"Build a sheet grid from the provided tree of data
@@ -340,17 +163,18 @@
340163
The workbook is a key value collection of (sheet-name grid), either as map or
341164
an association list (if ordering is important)."
342165
[workbook path]
343-
(let [path' (force-extension path "xlsx")
344-
;; Create the mutable, POI workbook object
345-
^XSSFWorkbook wb
346-
(reduce
347-
(fn [wb [sheet-name grid]] (doto wb (write-grid! sheet-name grid)))
348-
(workbook!)
349-
(seq workbook))]
350-
(p :write-to-disk
351-
(with-open [fos (io/output-stream (io/file (str path')))]
352-
(.write wb fos)))
353-
(io/file path')))
166+
(let [convert-cell (fn [{:keys [value style width height]
167+
:or {width 1 height 1}
168+
:as cell-data}]
169+
(if-not (map? cell-data)
170+
(pt/wrapped cell-data)
171+
(-> (pt/wrapped value)
172+
(pt/style style)
173+
(pt/dims {:width width :height height}))))
174+
convert-row (fn [row] (map convert-cell row))]
175+
(pt/write!
176+
(map (fn [[sheet grid]] [sheet (map convert-row grid)]) workbook)
177+
path)))
354178

355179
(defn convert-pdf!
356180
"Convert the `from-document`, either a File or a path to any office document,
@@ -424,8 +248,31 @@
424248
["This" "Row" "Has" "Its" "Own"
425249
{:value "Format" :style {:font {:bold true}}}]]}))
426250

251+
427252
(comment
253+
;; This should open an Excel workbook
254+
(example)
255+
428256
;; This will both open an example excel sheet and write & open a test pdf file
429257
;; with the same contents. On platforms without OpenOffice the convert-pdf!
430258
;; call will most likely fail.
431-
(open (convert-pdf! (example) (temp ".pdf"))))
259+
(open (convert-pdf! (example) (temp ".pdf")))
260+
261+
;; Expose ordering / styling issues in v1.2.X
262+
(quick-open
263+
[["Test"
264+
(table
265+
(for [x (range 10000)]
266+
{"N" x "N^2" (* x x) "N^3" (* x x x)}))]])
267+
268+
;; Ballpark performance test
269+
(dotimes [_ 5]
270+
(time
271+
(write!
272+
[["Test"
273+
(table
274+
(for [x (range 100000)]
275+
{"N" x "N^2" (* x x) "N^3" (* x x x)}))]]
276+
"test.xlsx")))
277+
278+
)

0 commit comments

Comments
 (0)