Skip to content

Commit 6993adc

Browse files
committed
Merge pull request #261 from pixie-lang/utf8-streams
Utf8 streams
2 parents 57308e2 + 1eb6e0a commit 6993adc

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

pixie/io.pxi

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,27 @@
131131
(set-buffer-count! buffer idx)
132132
(write downstream buffer)))
133133

134+
(deftype BufferedInputStream [upstream idx buffer]
135+
IByteInputStream
136+
(read-byte [this]
137+
(when (= idx (count buffer))
138+
(set-field! this :idx 0)
139+
(read upstream buffer (buffer-capacity buffer)))
140+
(let [val (nth buffer idx)]
141+
(set-field! this :idx (inc idx))
142+
val))
143+
IDisposable
144+
(-dispose! [this]
145+
(dispose! upstream)
146+
(dispose! buffer)))
147+
134148
(defn buffered-output-stream [downstream size]
135149
(->BufferedOutputStream downstream 0 (buffer size)))
136150

151+
(defn buffered-input-stream [upstream size]
152+
(let [b (buffer size)]
153+
(set-buffer-count! b size)
154+
(->BufferedInputStream upstream size b)))
137155

138156
(defn throw-on-error [result]
139157
(when (neg? result)

pixie/streams/utf8.pxi

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
(ns pixie.streams.utf8
2+
(require pixie.streams :refer :all))
3+
4+
(defprotocol IUTF8OutputStream
5+
(write-char [this char] "Write a single character to the UTF8 stream"))
6+
7+
(defprotocol IUTF8InputStream
8+
(read-char [this] "Read a single character from the UTF8 stream"))
9+
10+
(deftype UTF8OutputStream [out]
11+
IUTF8OutputStream
12+
(write-char [this ch]
13+
(let [ch (int ch)]
14+
(cond
15+
(<= ch 0x7F) (write-byte out ch)
16+
(<= ch 0x7FF) (do (write-byte out (bit-or 0xC0 (bit-shift-right ch 6)))
17+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))))
18+
(<= ch 0xFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 12)))
19+
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F)))
20+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))))
21+
(<= ch 0x1FFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 18)))
22+
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 12) 0x3F)))
23+
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F)))
24+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))))
25+
:else (assert false (str "Cannot encode a UTF8 character of code " ch)))))
26+
IDisposable
27+
(-dispose! [this]
28+
(dispose! out)))
29+
30+
31+
(deftype UTF8InputStream [in bad-char]
32+
IUTF8InputStream
33+
(read-char [this]
34+
(let [ch (int (read-byte in))
35+
[n bytes error?] (cond
36+
(>= 0x7F ch) [ch 1]
37+
(= 0xC0 (bit-and ch 0xE0)) [(bit-and ch 31) 2 false]
38+
(= 0xE0 (bit-and ch 0xF0)) [(bit-and ch 15) 3 false]
39+
(= 0xF0 (bit-and ch 0xF8)) [(bit-and ch 7) 4 false]
40+
(= 0xF8 (bit-and ch 0xF8)) [(bit-and ch 3) 5 true]
41+
(= 0xFC (bit-and ch 0xFE)) [(bit-and ch 1) 6 true]
42+
:else [n 1 true])]
43+
(loop [i (dec bytes)
44+
n n]
45+
(if (pos? i)
46+
(recur (dec i)
47+
(bit-or (bit-shift-left n 6)
48+
(bit-and (read-byte in) 0x3F)))
49+
(if error?
50+
(if bad-char
51+
bad-char
52+
(throw (str "Invalid UTF8 character decoded: " n)))
53+
(char n))))))
54+
IDisposable
55+
(-dispose! [this]
56+
(dispose! in)))
57+
58+
(defn utf8-input-stream
59+
"Creates a UTF8 decoder that reads characters from the given IByteInputStream. If a bad character is found
60+
an error will be thrown, unless an optional bad-character marker character is provided."
61+
([i]
62+
(->UTF8InputStream i nil))
63+
([i bad-char]
64+
(->UTF8InputStream i bad-char)))
65+
66+
(defn utf8-output-stream
67+
"Creates a UTF8 encoder that writes characters to the given IByteOutputStream."
68+
[o]
69+
(->UTF8OutputStream o))
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
(ns pixie.streams.test-utf8
2+
(require pixie.streams.utf8 :refer :all)
3+
(require pixie.io :as io)
4+
(require pixie.test :refer :all))
5+
6+
7+
(deftest test-writing-ints
8+
(using [os (-> (io/open-write "/tmp/pixie-utf-test.txt")
9+
(io/buffered-output-stream 1024)
10+
utf8-output-stream)]
11+
(dotimes [x 32000]
12+
(write-char os (char x))))
13+
(using [is (-> (io/open-read "/tmp/pixie-utf-test.txt")
14+
(io/buffered-input-stream 1024)
15+
utf8-input-stream)]
16+
(dotimes [x 32000]
17+
(assert= x (int (read-char is))))))

0 commit comments

Comments
 (0)