Skip to content

Commit 8071349

Browse files
committed
added error handling to UTF8 streams
1 parent 2491d2b commit 8071349

File tree

3 files changed

+38
-17
lines changed

3 files changed

+38
-17
lines changed

benchmarks/deftype_fields.pxi

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
b))
1212

1313

14-
(def adder (->Adder 1 0))
15-
(dotimes [x (* 1024 1024 1024)]
14+
15+
(def adder (->Adder 1.0 0))
16+
(println "Starting....")
17+
(dotimes [x (* 1024 1024 1024 20)]
1618
(assert (= (inc x) (add-them adder))))
19+
(println "Ending....")

pixie/math.pxi

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,8 @@
2323
(i/defcfn ceil)
2424
(i/defcfn fabs)
2525
(i/defcfn floor)
26-
(i/defcfn fmod))
26+
(i/defcfn fmod)
27+
28+
(i/defconst M_PI))
29+
30+
(def pi M_PI)

pixie/streams/utf8.pxi

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
(require pixie.streams :refer :all))
33

44
(defprotocol IUTF8OutputStream
5-
(write-char [this char]))
5+
(write-char [this char] "Write a single character to the UTF8 stream"))
66

77
(defprotocol IUTF8InputStream
8-
(read-char [this]))
8+
(read-char [this] "Read a single character from the UTF8 stream"))
99

1010
(deftype UTF8OutputStream [out]
1111
IUTF8OutputStream
@@ -21,35 +21,49 @@
2121
(<= ch 0x1FFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 18)))
2222
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 12) 0x3F)))
2323
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F)))
24-
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))) ))))
24+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))))
25+
:else (assert false (str "Cannot encode a UTF8 character of code " ch)))))
2526
IDisposable
2627
(-dispose! [this]
2728
(dispose! out)))
2829

2930

30-
(deftype UTF8InputStream [in]
31+
(deftype UTF8InputStream [in bad-char]
3132
IUTF8InputStream
3233
(read-char [this]
3334
(let [ch (int (read-byte in))
34-
[n bytes] (cond
35-
(>= 0x7F ch) [ch 1]
36-
(= 0xC0 (bit-and ch 0xE0)) [(bit-and ch 31) 2]
37-
(= 0xE0 (bit-and ch 0xF0)) [(bit-and ch 15) 3]
38-
(= 0xF0 (bit-and ch 0xF8)) [(bit-and ch 7) 4]
39-
:else (assert false (str "Got bad code " ch)))]
35+
[n bytes error?] (cond
36+
(>= 0x7F ch) [ch 1]
37+
(= 0xC0 (bit-and ch 0xE0)) [(bit-and ch 31) 2 false]
38+
(= 0xE0 (bit-and ch 0xF0)) [(bit-and ch 15) 3 false]
39+
(= 0xF0 (bit-and ch 0xF8)) [(bit-and ch 7) 4 false]
40+
(= 0xF8 (bit-and ch 0xF8)) [(bit-and ch 3) 5 true]
41+
(= 0xFC (bit-and ch 0xFE)) [(bit-and ch 1) 6 true]
42+
:else [n 1 true])]
4043
(loop [i (dec bytes)
4144
n n]
4245
(if (pos? i)
4346
(recur (dec i)
4447
(bit-or (bit-shift-left n 6)
4548
(bit-and (read-byte in) 0x3F)))
46-
(char n)))))
49+
(if error?
50+
(if bad-char
51+
bad-char
52+
(throw (str "Invalid UTF8 character decoded: " n)))
53+
(char n))))))
4754
IDisposable
4855
(-dispose! [this]
4956
(dispose! in)))
5057

51-
(defn utf8-input-stream [i]
52-
(->UTF8InputStream i))
58+
(defn utf8-input-stream
59+
"Creates a UTF8 decoder that reads characters from the given IByteInputStream. If a bad character is found
60+
an error will be thrown, unless an optional bad-character marker character is provided."
61+
([i]
62+
(->UTF8InputStream i nil))
63+
([i bad-char]
64+
(->UTF8InputStream i bad-char)))
5365

54-
(defn utf8-output-stream [o]
66+
(defn utf8-output-stream
67+
"Creates a UTF8 encoder that writes characters to the given IByteOutputStream."
68+
[o]
5569
(->UTF8OutputStream o))

0 commit comments

Comments
 (0)