Skip to content

Commit 99795b9

Browse files
committed
CSV: Replace 44aa2a8 with a much faster implementation
1 parent f7a8eea commit 99795b9

File tree

1 file changed

+47
-29
lines changed

1 file changed

+47
-29
lines changed

modules/csv/csv.scm

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,25 +42,43 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242
;; Useful procedures for working with csv files.
4343

4444
(define (csv-read file)
45-
(let* ((raw0 (file->u8vector file))
46-
(raw1 (u8vector->string raw0))
47-
(raw2 (string-replace-substring raw1 "\r\n" "\n"))
48-
(raw3 (string-replace-char raw2 #\return #\newline))
49-
(rows (string-split raw3 #\newline))
50-
(qcount 0) (qrow "") (idx 0)
51-
(finalrows (make-vector (length rows))))
52-
(for-each (lambda (row)
53-
(set! qcount (+ qcount (string-count row "\"")))
54-
(set! qrow (string-append qrow (if (> (string-length qrow) 0) "\n" "") row))
55-
(if (even? qcount) (begin
56-
(vector-set! finalrows idx (csv:split qrow))
57-
(set! idx (+ idx 1))
58-
(set! qrow "") (set! qcount 0)
45+
(let* ((raw (file->u8vector file))
46+
(lraw (u8vector-length raw))
47+
(rn? #f))
48+
(let loop ((i 0))
49+
(if (fx< i lraw) (begin
50+
(if (fx= (u8vector-ref raw i) 13) (begin
51+
(u8vector-set! raw i 10)
52+
(if (and (fx< (fx+ i 1) lraw) (fx= (u8vector-ref raw (fx+ i 1)) 10))
53+
(set! rn? #t)
54+
)
55+
))
56+
(loop (fx+ i 1)))))
57+
(let* ((rawstr (u8vector->string raw))
58+
(rows (string-split rawstr #\newline))
59+
(qcount 0) (qrow "") (idx 0) (processrow? #t)
60+
(finalrows (make-vector (length rows))))
61+
(for-each (lambda (row)
62+
(if rn?
63+
(if (fx= (string-length row) 0)
64+
(set! processrow? (not processrow?))
65+
(set! processrow? #t)
66+
)
67+
)
68+
(if processrow? (begin
69+
(set! processrow? #t)
70+
(set! qcount (+ qcount (string-count row "\"")))
71+
(set! qrow (string-append qrow (if (> (string-length qrow) 0) "\n" "") row))
72+
(if (even? qcount) (begin
73+
(vector-set! finalrows idx (csv:split qrow))
74+
(set! idx (+ idx 1))
75+
(set! qrow "") (set! qcount 0)
76+
))
5977
))) rows)
60-
(if (> (string-length qrow) 0) (begin
78+
(if (> (string-length qrow) 0) (begin
6179
(vector-set! finalrows idx (csv:split qrow))
6280
(set! idx (+ idx 1))))
63-
(vector->list (subvector finalrows 0 idx))))
81+
(vector->list (subvector finalrows 0 idx)))))
6482

6583
(define (csv:split str)
6684
;; First, split the line normally on all commas
@@ -130,14 +148,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
130148

131149
;; 1. csv-write the given list
132150
;; 2. csv-read it
133-
;; 3. compare
151+
;; 3. compare
134152
(define (csv-unit-test-list testlist)
135153
(let ((f (string-append (system-directory) (system-pathseparator) "csvtest.csv")))
136-
154+
137155
;; Remove file if it exists
138156
(if (file-exists? f)
139157
(delete-file f))
140-
158+
141159
;; Write and then read
142160
(csv-write f testlist)
143161
(let ((output (csv-read f)))
@@ -183,30 +201,30 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
183201

184202
;; Do comparison
185203
(equal? loadedstring outputstring)))
186-
)
187-
204+
)
205+
188206
;; 1. write the given string to a file
189207
;; 2. csv-read it
190208
;; 3. compare
191209
(define (csv-unit-test-read teststring testlist)
192210
(let ((f (string-append (system-directory) (system-pathseparator) "csvtest.csv")))
193-
211+
194212
;; Remove file if it exists
195213
(if (file-exists? f)
196214
(delete-file f))
197-
215+
198216
(let ((fh (open-output-file f)))
199217

200218
;; Output to the file
201219
(display teststring fh)
202220
(close-output-port fh)
203-
221+
204222
;; Write and then read=
205223
(let ((output (csv-read f)))
206224
(delete-file f)
207225
(equal? testlist output))))
208226
)
209-
227+
210228
(unit-test "csv-write-read" "Quotation marks and line feed"
211229
(lambda ()
212230
(csv-unit-test-list csv:test_line_feed))
@@ -218,20 +236,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
218236
)
219237

220238
(unit-test "csv-write-read" "Trend File 1"
221-
(lambda ()
239+
(lambda ()
222240
(csv-unit-test-string csv:test_trend_file1 csv:test_trend_file1))
223241
)
224242

225243
(unit-test "csv-write-read" "Trend File 2 Write-Read"
226-
(lambda ()
244+
(lambda ()
227245
(csv-unit-test-list csv:test_trend_list))
228246
)
229247

230248
(unit-test "csv-write-read" "Trend File 2 Read"
231-
(lambda ()
249+
(lambda ()
232250
(csv-unit-test-read csv:test_trend_file2 csv:test_trend_list))
233251
)
234-
252+
235253

236254

237255

0 commit comments

Comments
 (0)