1717 * using UTF-8 ByteLists as both input and output.
1818 */
1919abstract class ByteListTranscoder {
20- protected final ThreadContext context ;
21-
2220 protected ByteList src ;
2321 protected int srcEnd ;
2422 /** Position where the last read character started */
2523 protected int charStart ;
2624 /** Position of the next character to read */
2725 protected int pos ;
2826
29- private OutputStream out ;
3027 /**
3128 * When a character that can be copied straight into the output is found,
3229 * its index is stored on this variable, and copying is delayed until
@@ -36,20 +33,15 @@ abstract class ByteListTranscoder {
3633 */
3734 private int quoteStart = -1 ;
3835
39- protected ByteListTranscoder (ThreadContext context ) {
40- this .context = context ;
41- }
42-
43- protected void init (ByteList src , OutputStream out ) {
44- this .init (src , 0 , src .length (), out );
36+ protected void init (ByteList src ) {
37+ this .init (src , 0 , src .length ());
4538 }
4639
47- protected void init (ByteList src , int start , int end , OutputStream out ) {
40+ protected void init (ByteList src , int start , int end ) {
4841 this .src = src ;
4942 this .pos = start ;
5043 this .charStart = start ;
5144 this .srcEnd = end ;
52- this .out = out ;
5345 }
5446
5547 /**
@@ -70,52 +62,57 @@ private char next() {
7062 * Reads an UTF-8 character from the input and returns its code point,
7163 * while advancing the input position.
7264 *
73- * <p>Raises an {@link #invalidUtf8()} exception if an invalid byte
65+ * <p>Raises an {@link #invalidUtf8(ThreadContext )} exception if an invalid byte
7466 * is found.
7567 */
76- protected int readUtf8Char () {
68+ protected int readUtf8Char (ThreadContext context ) {
7769 charStart = pos ;
7870 char head = next ();
7971 if (head <= 0x7f ) { // 0b0xxxxxxx (ASCII)
8072 return head ;
8173 }
8274 if (head <= 0xbf ) { // 0b10xxxxxx
83- throw invalidUtf8 (); // tail byte with no head
75+ throw invalidUtf8 (context ); // tail byte with no head
8476 }
8577 if (head <= 0xdf ) { // 0b110xxxxx
86- ensureMin (1 );
78+ ensureMin (context , 1 );
8779 int cp = ((head & 0x1f ) << 6 )
88- | nextPart ();
89- if (cp < 0x0080 ) throw invalidUtf8 ();
80+ | nextPart (context );
81+ if (cp < 0x0080 ) throw invalidUtf8 (context );
9082 return cp ;
9183 }
9284 if (head <= 0xef ) { // 0b1110xxxx
93- ensureMin (2 );
85+ ensureMin (context , 2 );
9486 int cp = ((head & 0x0f ) << 12 )
95- | (nextPart () << 6 )
96- | nextPart ();
97- if (cp < 0x0800 ) throw invalidUtf8 ();
87+ | (nextPart (context ) << 6 )
88+ | nextPart (context );
89+ if (cp < 0x0800 ) throw invalidUtf8 (context );
9890 return cp ;
9991 }
10092 if (head <= 0xf7 ) { // 0b11110xxx
101- ensureMin (3 );
93+ ensureMin (context , 3 );
10294 int cp = ((head & 0x07 ) << 18 )
103- | (nextPart () << 12 )
104- | (nextPart () << 6 )
105- | nextPart ();
106- if (!Character .isValidCodePoint (cp )) throw invalidUtf8 ();
95+ | (nextPart (context ) << 12 )
96+ | (nextPart (context ) << 6 )
97+ | nextPart (context );
98+ if (!Character .isValidCodePoint (cp )) throw invalidUtf8 (context );
10799 return cp ;
108100 }
109101 // 0b11111xxx?
110- throw invalidUtf8 ();
102+ throw invalidUtf8 (context );
103+ }
104+
105+ protected int readASCIIChar () {
106+ charStart = pos ;
107+ return next ();
111108 }
112109
113110 /**
114111 * Throws a GeneratorError if the input list doesn't have at least this
115112 * many bytes left.
116113 */
117- protected void ensureMin (int n ) {
118- if (pos + n > srcEnd ) throw incompleteUtf8 ();
114+ protected void ensureMin (ThreadContext context , int n ) {
115+ if (pos + n > srcEnd ) throw incompleteUtf8 (context );
119116 }
120117
121118 /**
@@ -124,10 +121,10 @@ protected void ensureMin(int n) {
124121 *
125122 * <p>Throws a GeneratorError if the byte is not a valid tail.
126123 */
127- private int nextPart () {
124+ private int nextPart (ThreadContext context ) {
128125 char c = next ();
129126 // tail bytes must be 0b10xxxxxx
130- if ((c & 0xc0 ) != 0x80 ) throw invalidUtf8 ();
127+ if ((c & 0xc0 ) != 0x80 ) throw invalidUtf8 (context );
131128 return c & 0x3f ;
132129 }
133130
@@ -147,23 +144,19 @@ protected void quoteStart() {
147144 */
148145 protected void quoteStop (int endPos ) throws IOException {
149146 if (quoteStart != -1 ) {
150- out . write (src .bytes (), quoteStart , endPos - quoteStart );
147+ append (src .unsafeBytes (), src . begin () + quoteStart , endPos - quoteStart );
151148 quoteStart = -1 ;
152149 }
153150 }
154151
155- protected void append (int b ) throws IOException {
156- out .write (b );
157- }
152+ protected abstract void append (int b ) throws IOException ;
158153
159- protected void append (byte [] origin , int start , int length ) throws IOException {
160- out .write (origin , start , length );
161- }
154+ protected abstract void append (byte [] origin , int start , int length ) throws IOException ;
162155
163156
164- protected abstract RaiseException invalidUtf8 ();
157+ protected abstract RaiseException invalidUtf8 (ThreadContext context );
165158
166- protected RaiseException incompleteUtf8 () {
167- return invalidUtf8 ();
159+ protected RaiseException incompleteUtf8 (ThreadContext context ) {
160+ return invalidUtf8 (context );
168161 }
169162}
0 commit comments