Skip to content

Commit 2491d2b

Browse files
committed
added basic utf8 encoder-decoder
1 parent 2c4012a commit 2491d2b

File tree

3 files changed

+90
-0
lines changed

3 files changed

+90
-0
lines changed

pixie/io.pxi

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,27 @@
131131
(set-buffer-count! buffer idx)
132132
(write downstream buffer)))
133133

134+
(deftype BufferedInputStream [upstream idx buffer]
135+
IByteInputStream
136+
(read-byte [this]
137+
(when (= idx (count buffer))
138+
(set-field! this :idx 0)
139+
(read upstream buffer (buffer-capacity buffer)))
140+
(let [val (nth buffer idx)]
141+
(set-field! this :idx (inc idx))
142+
val))
143+
IDisposable
144+
(-dispose! [this]
145+
(dispose! upstream)
146+
(dispose! buffer)))
147+
134148
(defn buffered-output-stream [downstream size]
135149
(->BufferedOutputStream downstream 0 (buffer size)))
136150

151+
(defn buffered-input-stream [upstream size]
152+
(let [b (buffer size)]
153+
(set-buffer-count! b size)
154+
(->BufferedInputStream upstream size b)))
137155

138156
(defn throw-on-error [result]
139157
(when (neg? result)

pixie/streams/utf8.pxi

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
(ns pixie.streams.utf8
2+
(require pixie.streams :refer :all))
3+
4+
(defprotocol IUTF8OutputStream
5+
(write-char [this char]))
6+
7+
(defprotocol IUTF8InputStream
8+
(read-char [this]))
9+
10+
(deftype UTF8OutputStream [out]
11+
IUTF8OutputStream
12+
(write-char [this ch]
13+
(let [ch (int ch)]
14+
(cond
15+
(<= ch 0x7F) (write-byte out ch)
16+
(<= ch 0x7FF) (do (write-byte out (bit-or 0xC0 (bit-shift-right ch 6)))
17+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))))
18+
(<= ch 0xFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 12)))
19+
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F)))
20+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))))
21+
(<= ch 0x1FFFFF) (do (write-byte out (bit-or 0xE0 (bit-shift-right ch 18)))
22+
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 12) 0x3F)))
23+
(write-byte out (bit-or 0x80 (bit-and (bit-shift-right ch 6) 0x3F)))
24+
(write-byte out (bit-or 0x80 (bit-and ch 0x3F))) ))))
25+
IDisposable
26+
(-dispose! [this]
27+
(dispose! out)))
28+
29+
30+
(deftype UTF8InputStream [in]
31+
IUTF8InputStream
32+
(read-char [this]
33+
(let [ch (int (read-byte in))
34+
[n bytes] (cond
35+
(>= 0x7F ch) [ch 1]
36+
(= 0xC0 (bit-and ch 0xE0)) [(bit-and ch 31) 2]
37+
(= 0xE0 (bit-and ch 0xF0)) [(bit-and ch 15) 3]
38+
(= 0xF0 (bit-and ch 0xF8)) [(bit-and ch 7) 4]
39+
:else (assert false (str "Got bad code " ch)))]
40+
(loop [i (dec bytes)
41+
n n]
42+
(if (pos? i)
43+
(recur (dec i)
44+
(bit-or (bit-shift-left n 6)
45+
(bit-and (read-byte in) 0x3F)))
46+
(char n)))))
47+
IDisposable
48+
(-dispose! [this]
49+
(dispose! in)))
50+
51+
(defn utf8-input-stream [i]
52+
(->UTF8InputStream i))
53+
54+
(defn utf8-output-stream [o]
55+
(->UTF8OutputStream o))
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
(ns pixie.streams.test-utf8
2+
(require pixie.streams.utf8 :refer :all)
3+
(require pixie.io :as io)
4+
(require pixie.test :refer :all))
5+
6+
7+
(deftest test-writing-ints
8+
(using [os (-> (io/open-write "/tmp/pixie-utf-test.txt")
9+
(io/buffered-output-stream 1024)
10+
utf8-output-stream)]
11+
(dotimes [x 32000]
12+
(write-char os (char x))))
13+
(using [is (-> (io/open-read "/tmp/pixie-utf-test.txt")
14+
(io/buffered-input-stream 1024)
15+
utf8-input-stream)]
16+
(dotimes [x 32000]
17+
(assert= x (int (read-char is))))))

0 commit comments

Comments
 (0)