Skip to content

Commit 9958b7c

Browse files
author
Jason Duncan
committed
Initial commit.
0 parents  commit 9958b7c

10 files changed

+675
-0
lines changed

format-table-test.el

Lines changed: 334 additions & 0 deletions
Large diffs are not rendered by default.

format-table.el

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
;;; formats-table.el --- Parse and reformat tabular data. -*- lexical-binding: t; -*-
2+
3+
;; Copyright (C) 2018 Jason Duncan, all rights reserved
4+
5+
;; Author: Jason Duncan <[email protected]>
6+
;; Version: 0.0.1
7+
;; Keywords: data
8+
;; URL: https://github.com/functionreturnfurnction/format-table
9+
;; Package-Requires: ((emacs "25") (dash "2.14.1"))
10+
11+
;; This program is free software; you can redistribute it and/or modify
12+
;; it under the terms of the GNU General Public License as published by
13+
;; the Free Software Foundation, either version 3 of the License, or
14+
;; (at your option) any later version.
15+
16+
;; This program is distributed in the hope that it will be useful,
17+
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18+
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19+
;; GNU General Public License for more details.
20+
21+
;; You should have received a copy of the GNU General Public License
22+
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
23+
24+
;;; Commentary:
25+
26+
;; Parse and reformat tabular data.
27+
28+
;;; Code:
29+
30+
(require 'dash)
31+
(require 'json)
32+
33+
(defvar format-table-format-alist
34+
'((ms
35+
:begin-row ""
36+
:end-row ""
37+
:col-separator " "
38+
:separator-col-separator " "
39+
:separator-begin-row ""
40+
:separator-end-row ""
41+
:row-count-format "(%s rows affected)")
42+
43+
(org
44+
:begin-row "| "
45+
:end-row " |"
46+
:col-separator " | "
47+
:separator-col-separator "-+-"
48+
:separator-begin-row "|-"
49+
:separator-end-row "-|")
50+
51+
(mysql
52+
:top-border-fn format-table-render-separator-row
53+
:bottom-border-fn format-table-render-separator-row
54+
:begin-row "| "
55+
:end-row " |"
56+
:col-separator " | "
57+
:separator-col-separator "-+-"
58+
:separator-begin-row "+-"
59+
:separator-end-row "-+"
60+
:row-count-format "%s rows? in set ([[:digit:]]+.[[:digit:]]+ sec)")
61+
62+
(postgresql
63+
:header-pad-fn format-table-pad-center
64+
:begin-row " "
65+
:end-row " "
66+
:col-separator " | "
67+
:separator-col-separator "-+-"
68+
:separator-begin-row "-"
69+
:separator-end-row "-"
70+
:row-count-format "(%s rows)")
71+
72+
(json . json)))
73+
74+
(defun format-table-pad-right (value length)
75+
"Pad the string VALUE to the length specified by LENGTH using spaces to the right."
76+
(format (concat "%-" (number-to-string length) "s") value))
77+
78+
(defun format-table-pad-center (value length)
79+
"Pad the string VALUE to the length specified by LENGTH by surrounding with spaces."
80+
(let* ((value-length (length value))
81+
(left-length (/ (- length value-length) 2))
82+
(right-length (+ left-length
83+
(if (or
84+
(and (= 1 (% value-length 2))
85+
(= 0 (% length 2)))
86+
(and (= 0 (% value-length 2))
87+
(= 1 (% length 2))))
88+
1 0))))
89+
(concat
90+
(make-string left-length ? )
91+
value
92+
(make-string right-length ? ))))
93+
94+
(defun format-table-remove-noise (lines input-mode)
95+
"Given the set of table LINES and some extra information in INPUT-MODE, filter out any empty lines or lines which otherwise do not belong to the table of values."
96+
(let* ((row-count-format (plist-get input-mode :row-count-format))
97+
(regexp (if (nilp row-count-format) nil (format row-count-format "[[:digit:]]+")))
98+
ret)
99+
(dolist (cur-line (reverse lines) ret)
100+
(if (not (or (string-equal "" cur-line)
101+
(if (nilp regexp) nil (string-match regexp cur-line))))
102+
(push cur-line ret)))
103+
(if (nilp ret) nil
104+
(setq ret (if (plist-get input-mode :top-border-fn) (-slice ret 1) ret))
105+
(if (plist-get input-mode :top-border-fn)
106+
(-slice ret 0 (1- (length ret))) ret))))
107+
108+
(defun format-table-trim-row (row begin-row end-row)
109+
"Given the string ROW trim the string BEGIN-ROW from the beginning and END-ROW from the end."
110+
(replace-regexp-in-string
111+
(concat "^" (regexp-quote begin-row))
112+
""
113+
(replace-regexp-in-string
114+
(concat (regexp-quote end-row) "$")
115+
""
116+
row)))
117+
118+
(defun format-table-get-col-widths (dashes input-mode)
119+
"Using the line of all DASHES from the table and the INPUT-MODE, determine the widths of the columns and return them in a list."
120+
(let* ((separator-begin-row (plist-get input-mode :separator-begin-row))
121+
(separator-end-row (plist-get input-mode :separator-end-row))
122+
(separator-col-separator (plist-get input-mode :separator-col-separator))
123+
(dashes (format-table-trim-row dashes separator-begin-row separator-end-row)))
124+
(-map 'length (split-string dashes
125+
(regexp-quote separator-col-separator)))))
126+
127+
(defun format-table-split-row (row col-widths input-mode)
128+
"Split the given string ROW based on the fixed positions listed in COL-WIDTHS and any additional information in INPUT-MODE."
129+
(let* ((begin-row (plist-get input-mode :begin-row))
130+
(end-row (plist-get input-mode :end-row))
131+
(col-separator (plist-get input-mode :col-separator))
132+
(row (format-table-trim-row row begin-row end-row))
133+
split)
134+
(reverse
135+
(dolist (cur-width col-widths split)
136+
(let* ((row-padded (if (> cur-width (length row))
137+
(format-table-pad-right row cur-width)
138+
row)))
139+
(push (string-trim (substring row-padded 0 cur-width)) split)
140+
;; trim off the spaces used to separate columns, but only if it's there (might not be on last row)
141+
(setq row (substring row-padded
142+
(if (> (length row-padded) cur-width)
143+
(+ cur-width (length col-separator))
144+
cur-width))))))))
145+
146+
(defun format-table-assemble-table (header body)
147+
"Given the HEADER list of column names and nested list of values BODY, return an alist with both and some extra meta information about same."
148+
(let ((max-col-widths (format-table-get-max-col-widths (cons header body))))
149+
(list :header header
150+
:body body
151+
:max-col-widths max-col-widths
152+
:row-count (length body))))
153+
154+
(defun format-table-parse-table (lines col-widths input-mode)
155+
"Given the list of table LINES, set of COL-WIDTHS, and INPUT-MODE, build a table of values as a plist."
156+
(let* ((header (format-table-split-row (car lines) col-widths input-mode))
157+
(body (--map (format-table-split-row it col-widths input-mode) (-slice lines 2))))
158+
(format-table-assemble-table header body)))
159+
160+
(defun format-table-get-max-col-widths (table)
161+
"Given the nested list TABLE of values, determine the length of the longest value in each column and return each in a list."
162+
(let ((last (make-list (length (car table)) 0)))
163+
(dolist (cur-row table last)
164+
(setq last
165+
(-zip-with 'max (-map 'length cur-row) last)))))
166+
167+
(defun format-table-render-row (row max-col-widths output-mode &optional pad-fn)
168+
"Given the list of values ROW, the list of MAX-COL-WIDTHS, and delimiter information in OUTPUT-MODE, render a table row with the proper column separators and a newline at the end.
169+
170+
Optionally use PAD-FN to pad each column value, otherwise values will be padded to the right with spaces."
171+
(let ((pad-fn (or pad-fn 'format-table-pad-right)))
172+
(concat
173+
(plist-get output-mode :begin-row)
174+
(string-join
175+
(-zip-with pad-fn row max-col-widths)
176+
(plist-get output-mode :col-separator))
177+
(plist-get output-mode :end-row)
178+
hard-newline)))
179+
180+
(defun format-table-generate-dash-string (length)
181+
"Generate a string of hyphens LENGTH chars long."
182+
(make-string length ?-))
183+
184+
(defun format-table-render-separator-row (max-col-widths output-mode)
185+
"Given the list of MAX-COL-WIDTHS and delimiter information in OUTPUT-MODE, render a row which separates the header row from the rest of the rows."
186+
(concat (plist-get output-mode :separator-begin-row)
187+
(string-join
188+
(-map 'format-table-generate-dash-string max-col-widths)
189+
(plist-get output-mode :separator-col-separator))
190+
(plist-get output-mode :separator-end-row)
191+
hard-newline))
192+
193+
(defun format-table-render-json (table)
194+
"Render the TABLE of values as a json string."
195+
(let ((vec []))
196+
(dolist (cur-row (plist-get table :body) vec)
197+
(let ((rec (-zip-with 'cons (plist-get table :header) cur-row)))
198+
(setq vec (vconcat vec (list rec)))))
199+
(json-encode vec)))
200+
201+
(defun format-table-render-table (table output-mode)
202+
"Given the TABLE of values and delimiter information in OUTPUT-MODE, re-render the table as a string."
203+
(if (equal output-mode 'json)
204+
(format-table-render-json table)
205+
(let ((top-border-fn (plist-get output-mode :top-border-fn))
206+
(bottom-border-fn (plist-get output-mode :bottom-border-fn))
207+
(header-pad-fn (plist-get output-mode :header-pad-fn))
208+
(max-col-widths (plist-get table :max-col-widths)))
209+
(concat
210+
(if top-border-fn (funcall top-border-fn max-col-widths output-mode))
211+
(format-table-render-row (plist-get table :header) max-col-widths output-mode header-pad-fn)
212+
(format-table-render-separator-row max-col-widths output-mode)
213+
(string-join
214+
(--map (format-table-render-row it max-col-widths output-mode) (plist-get table :body)))
215+
(if bottom-border-fn (funcall bottom-border-fn max-col-widths output-mode))))))
216+
217+
(defun format-table-render-row-count (count output-mode)
218+
"Render the given row count COUNT for the given OUTPUT-MODE."
219+
(let ((output-format (plist-get output-mode :row-count-format)))
220+
(if (nilp output-format) ""
221+
(replace-regexp-in-string
222+
(regexp-quote "[[:digit:]]+.[[:digit:]]+") "0.00"
223+
(replace-regexp-in-string
224+
(regexp-quote (if (= 1 count) "s?" "?")) ""
225+
(format output-format count))))))
226+
227+
(defun format-table-get-format (mode)
228+
"Return the MODE from `format-table-format-alist', erroring if not found."
229+
(or (alist-get mode format-table-format-alist)
230+
(error (format "Format mode %s not recognized." mode))))
231+
232+
(defun format-table-parse-json-gather-column-values (obj)
233+
"Gather cdrs from alist OBJ."
234+
(-map 'cdr obj))
235+
236+
(defun format-table-parse-json-gather-column-names (obj)
237+
"Gather cars from alist OBJ."
238+
(-map 'car obj))
239+
240+
(defun format-table-parse-json (str)
241+
"Parse the json string STR to a table of values as a plist."
242+
(let* ((json-key-type 'string)
243+
(vec (append (json-read-from-string str) nil))
244+
(header (format-table-parse-json-gather-column-names (car vec)))
245+
(body (-map 'format-table-parse-json-gather-column-values vec)))
246+
(format-table-assemble-table header body)))
247+
248+
(defun format-table-cleanup-and-parse (str input-mode)
249+
"Parse the given string STR using delimiater information in INPUT-MODE to a table of values as a plist."
250+
(if (equal input-mode 'json)
251+
(format-table-parse-json str)
252+
(let* ((lines (split-string str "[
253+
]+"))
254+
(lines (format-table-remove-noise lines input-mode)))
255+
(if (nilp lines) nil
256+
(let* ((col-widths (format-table-get-col-widths (car (cdr lines)) input-mode)))
257+
(format-table-parse-table lines col-widths input-mode))))))
258+
259+
(defun format-table (str input-mode output-mode)
260+
"Process the given string STR containing a table in a format specified by INPUT-MODE, gather and reformat the table contained within to the format specified by OUTPUT-MODE."
261+
(let* ((input-mode (format-table-get-format input-mode))
262+
(output-mode (format-table-get-format output-mode))
263+
(table (format-table-cleanup-and-parse str input-mode)))
264+
(if (nilp table)
265+
str
266+
(concat
267+
(format-table-render-table table output-mode)
268+
(format-table-render-row-count (plist-get table :row-count) output-mode)))))
269+
270+
(provide 'format-table)
271+
;;; format-table.el ends here

test-data/big-ms-input.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
SalesOrderID RevisionNumber OrderDate DueDate ShipDate Status OnlineOrderFlag SalesOrderNumber PurchaseOrderNumber AccountNumber CustomerID SalesPersonID TerritoryID BillToAddressID ShipToAddressID ShipMethodID CreditCardID CreditCardApprovalCode CurrencyRateID SubTotal TaxAmt Freight TotalDue Comment rowguid ModifiedDate
2+
------------ -------------- ----------------------- ----------------------- ----------------------- ------ --------------- ------------------------- ------------------------- --------------- ----------- ------------- ----------- --------------- --------------- ------------ ------------ ---------------------- -------------- --------------------- --------------------- --------------------- --------------------- -------------------------------------------------------------------------------------------------------------------------------- ------------------------------------ -----------------------
3+
43659 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43659 PO522145787 10-4020-000676 29825 279 5 985 985 5 16281 105041Vi84182 NULL 20565.6206 1971.5149 616.0984 23153.2339 NULL 79B65321-39CA-4115-9CBA-8FE0903E12E6 2011-06-07 00:00:00.000
4+
43660 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43660 PO18850127500 10-4020-000117 29672 279 5 921 921 5 5618 115213Vi29411 NULL 1294.2529 124.2483 38.8276 1457.3288 NULL 738DC42D-D03B-48A1-9822-F95A67EA7389 2011-06-07 00:00:00.000
5+
43661 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43661 PO18473189620 10-4020-000442 29734 282 6 517 517 5 1346 85274Vi6854 4 32726.4786 3153.7696 985.5530 36865.8012 NULL D91B9131-18A4-4A11-BC3A-90B6F53E9D74 2011-06-07 00:00:00.000
6+
43662 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43662 PO18444174044 10-4020-000227 29994 282 6 482 482 5 10456 125295Vi53935 4 28832.5289 2775.1646 867.2389 32474.9324 NULL 4A1ECFC0-CC3A-4740-B028-1C50BB48711C 2011-06-07 00:00:00.000
7+
43663 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43663 PO18009186470 10-4020-000510 29565 276 4 1073 1073 5 4322 45303Vi22691 NULL 419.4589 40.2681 12.5838 472.3108 NULL 9B1E7A40-6AE0-4AD3-811C-A64951857C4B 2011-06-07 00:00:00.000
8+
43664 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43664 PO16617121983 10-4020-000397 29898 280 1 876 876 5 806 95555Vi4081 NULL 24432.6088 2344.9921 732.8100 27510.4109 NULL 22A8A5DA-8C22-42AD-9241-839489B6EF0D 2011-06-07 00:00:00.000
9+
43665 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43665 PO16588191572 10-4020-000146 29580 283 1 849 849 5 15232 35568Vi78804 NULL 14352.7713 1375.9427 429.9821 16158.6961 NULL 5602C304-853C-43D7-9E79-76E320D476CF 2011-06-07 00:00:00.000
10+
43666 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43666 PO16008173883 10-4020-000511 30052 276 4 1074 1074 5 13349 105623Vi69217 NULL 5056.4896 486.3747 151.9921 5694.8564 NULL E2A90057-1366-4487-8A7E-8085845FF770 2011-06-07 00:00:00.000
11+
43667 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43667 PO15428132599 10-4020-000646 29974 277 3 629 629 5 10370 55680Vi53503 NULL 6107.0820 586.1203 183.1626 6876.3649 NULL 86D5237D-432D-4B21-8ABC-671942F5789D 2011-06-07 00:00:00.000
12+
43668 8 2011-05-31 00:00:00.000 2011-06-12 00:00:00.000 2011-06-07 00:00:00.000 5 0 SO43668 PO14732180295 10-4020-000514 29614 282 6 529 529 5 1566 85817Vi8045 4 35944.1562 3461.7654 1081.8017 40487.7233 NULL 281CC355-D538-494E-9B44-461B36A826C6 2011-06-07 00:00:00.000
13+
14+
(10 rows affected)

0 commit comments

Comments
 (0)