Skip to content

Commit 29f50f7

Browse files
authored
Merge pull request #32 from gsnewmark/fix-unicode-decoding
Fix issue #30 Unicode decoding in conversion to CharSequence
2 parents 7db6dc5 + 19606d3 commit 29f50f7

File tree

1 file changed

+28
-7
lines changed

1 file changed

+28
-7
lines changed

src/byte_streams/char_sequence.clj

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,22 @@
3333
(parse-result (.decode decoder in out false)))
3434

3535
(defn flush
36-
[^CharsetDecoder decoder ^CharBuffer out]
37-
(and
38-
(parse-result (.decode decoder (ByteBuffer/allocate 0) out true))
39-
(parse-result (.flush decoder out))))
36+
([decoder out] (flush decoder (ByteBuffer/allocate 0) out))
37+
([^CharsetDecoder decoder ^ByteBuffer in ^CharBuffer out]
38+
(and
39+
(parse-result (.decode decoder in out true))
40+
(parse-result (.flush decoder out)))))
41+
42+
(defn has-remaining-bytes? [^ByteBuffer byte-buffer]
43+
{:pre [(some? byte-buffer)]}
44+
(.hasRemaining byte-buffer))
45+
46+
(defn merge-byte-buffers [^ByteBuffer l ^ByteBuffer r]
47+
{:pre [(some? l) (some? r)]}
48+
(-> (ByteBuffer/allocate (+ (.remaining l) (.remaining r)))
49+
(.put l)
50+
(.put r)
51+
.flip))
4052

4153
(defn lazy-char-buffer-sequence
4254
[^CharsetDecoder decoder
@@ -64,17 +76,26 @@
6476
(lazy-char-buffer-sequence decoder chunk-size extra-bytes close-fn byte-source))
6577

6678
(if-let [in (byte-source chunk-size)]
67-
(let [result (decode decoder in out)]
79+
(let [expanded-in (if (some-> extra-bytes has-remaining-bytes?)
80+
;; in case of underflow we need to pass new buffer
81+
;; containing remaining bytes from the initial input
82+
;; along with some new bytes to the CharsetDecoder
83+
(merge-byte-buffers extra-bytes in)
84+
in)
85+
result (decode decoder expanded-in out)]
6886
(cons
6987
(.flip out)
7088
(lazy-char-buffer-sequence
7189
decoder
7290
chunk-size
73-
(when (= :overflow result) in)
91+
(when (has-remaining-bytes? expanded-in)
92+
expanded-in)
7493
close-fn
7594
byte-source)))
7695
(do
77-
(flush decoder out)
96+
(if (some? extra-bytes)
97+
(flush decoder extra-bytes out)
98+
(flush decoder out))
7899
(when close-fn (close-fn))
79100
(.flip out)))))))
80101

0 commit comments

Comments
 (0)