3737import java .nio .charset .CoderResult ;
3838
3939/**
40- * CDATAEncoder -- encoder for CDATA sections. CDATA sections are generally good for including large blocks of text that contain
41- * characters that normally require encoding (ampersand, quotes, less-than, etc...). The CDATA context however still does not
42- * allow invalid characters, and can be closed by the sequence "]]>". This encoder removes invalid XML characters, and encodes
43- * "]]>" (to "]]]]><![CDATA[>"). The result is that the data integrity is maintained, but the code receiving the output will
44- * have to handle multiple CDATA events. As an alternate approach, the caller could pre-encode "]]>" to something of their
45- * choosing (e.g. data.replaceAll("\\]\\]>", "]] >")), then use this encoder to remove any invalid XML characters.
40+ * CDATAEncoder -- encoder for CDATA sections. CDATA sections are generally good
41+ * for including large blocks of text that contain characters that normally
42+ * require encoding (ampersand, quotes, less-than, etc...). The CDATA context
43+ * however still does not allow invalid characters, and can be closed by the
44+ * sequence "]]>". This encoder removes invalid XML characters, and encodes
45+ * "]]>" (to "]]]]><![CDATA[>"). The result is that the data integrity is
46+ * maintained, but the code receiving the output will have to handle multiple
47+ * CDATA events. As an alternate approach, the caller could pre-encode "]]>" to
48+ * something of their choosing (e.g. data.replaceAll("\\]\\]>", "]] >")), then
49+ * use this encoder to remove any invalid XML characters.
4650 *
4751 * @author Jeff Ichnowski
4852 */
@@ -94,33 +98,31 @@ protected int firstEncodedOffset(String input, int off, int len) {
9498// // valid
9599 }
96100
97- } else {
98- if (i + 1 < n ) {
99- if (input .charAt (i + 1 ) != ']' ) {
100- // "]x" (next character is safe for this to be ']')
101- } else {
102- // "]]?"
103- // keep looping through ']'
104- for (; i + 2 < n && input .charAt (i + 2 ) == ']' ; ++i ) {
105- // valid
106- }
107- // at this point we've looped through a sequence
108- // of 2 or more "]", if the next character is ">"
109- // we need to encode "]]>".
110- if (i + 2 < n ) {
111- if (input .charAt (i + 2 ) == '>' ) {
112- return i ;
101+ } else if (i + 1 < n ) {
102+ if (input .charAt (i + 1 ) != ']' ) {
103+ // "]x" (next character is safe for this to be ']')
104+ } else {
105+ // "]]?"
106+ // keep looping through ']'
107+ for (; i + 2 < n && input .charAt (i + 2 ) == ']' ; ++i ) {
108+ // valid
109+ }
110+ // at this point we've looped through a sequence
111+ // of 2 or more "]", if the next character is ">"
112+ // we need to encode "]]>".
113+ if (i + 2 < n ) {
114+ if (input .charAt (i + 2 ) == '>' ) {
115+ return i ;
113116// } else {
114117// // valid
115- }
116-
117- } else {
118- return n ;
119118 }
119+
120+ } else {
121+ return n ;
120122 }
121- } else {
122- return n ;
123123 }
124+ } else {
125+ return n ;
124126 }
125127 } else if (ch < Character .MIN_HIGH_SURROGATE ) {
126128 if (ch <= Unicode .MAX_C1_CTRL_CHAR && ch != Unicode .NEL ) {
@@ -145,11 +147,12 @@ protected int firstEncodedOffset(String input, int off, int len) {
145147 // end of input, high without low = invalid
146148 return i ;
147149 }
148- } else if ( // low surrogate without preceding high surrogate
150+ } else if (// low surrogate without preceding high surrogate
149151 ch <= Character .MAX_LOW_SURROGATE
150- || // non characters
151- ch > '\ufffd'
152- || ('\ufdd0' <= ch && ch <= '\ufdef' )) {
152+ // or non-characters
153+ || ch > '\ufffd'
154+ || ('\ufdd0' <= ch && ch <= '\ufdef' ))
155+ {
153156 return i ;
154157// } else {
155158// // valid
@@ -180,63 +183,61 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
180183 } else {
181184 out [j ++] = XMLEncoder .INVALID_CHARACTER_REPLACEMENT ;
182185 }
183- } else {
184- if (i + 1 < n ) {
185- if (in [i + 1 ] != ']' ) {
186- // "]x" (next character is safe for this to be ']')
186+ } else if (i + 1 < n ) {
187+ if (in [i + 1 ] != ']' ) {
188+ // "]x" (next character is safe for this to be ']')
189+ if (j >= m ) {
190+ return overflow (input , i , output , j );
191+ }
192+ out [j ++] = ']' ;
193+ } else {
194+ // "]]?"
195+ // keep looping through ']'
196+ for (; i + 2 < n && in [i + 2 ] == ']' ; ++i ) {
187197 if (j >= m ) {
188198 return overflow (input , i , output , j );
189199 }
190200 out [j ++] = ']' ;
191- } else {
192- // "]]?"
193- // keep looping through ']'
194- for (; i + 2 < n && in [i + 2 ] == ']' ; ++i ) {
195- if (j >= m ) {
201+ }
202+ // at this point we've looped through a sequence
203+ // of 2 or more "]", if the next character is ">"
204+ // we need to encode "]]>".
205+ if (i + 2 < n ) {
206+ if (in [i + 2 ] == '>' ) {
207+ if (j + CDATA_END_ENCODED_LENGTH > m ) {
196208 return overflow (input , i , output , j );
197209 }
198- out [j ++] = ']' ;
199- }
200- // at this point we've looped through a sequence
201- // of 2 or more "]", if the next character is ">"
202- // we need to encode "]]>".
203- if (i + 2 < n ) {
204- if (in [i + 2 ] == '>' ) {
205- if (j + CDATA_END_ENCODED_LENGTH > m ) {
206- return overflow (input , i , output , j );
207- }
208- System .arraycopy (CDATA_END_ENCODED , 0 , out , j , CDATA_END_ENCODED_LENGTH );
209- j += CDATA_END_ENCODED_LENGTH ;
210- i += 2 ;
211- } else {
212- if (j >= m ) {
213- return overflow (input , i , output , j );
214- }
215- out [j ++] = ']' ;
216- }
217- } else if (endOfInput ) {
218- if (j + 2 > m ) {
210+ System .arraycopy (CDATA_END_ENCODED , 0 , out , j , CDATA_END_ENCODED_LENGTH );
211+ j += CDATA_END_ENCODED_LENGTH ;
212+ i += 2 ;
213+ } else {
214+ if (j >= m ) {
219215 return overflow (input , i , output , j );
220216 }
221217 out [j ++] = ']' ;
222- out [j ++] = ']' ;
223- i = n ;
224- break ;
225- } else {
226- break ;
227218 }
219+ } else if (endOfInput ) {
220+ if (j + 2 > m ) {
221+ return overflow (input , i , output , j );
222+ }
223+ out [j ++] = ']' ;
224+ out [j ++] = ']' ;
225+ i = n ;
226+ break ;
227+ } else {
228+ break ;
228229 }
229- } else if (endOfInput ) {
230- // seen "]", then end of input.
231- if (j >= m ) {
232- return overflow (input , i , output , j );
233- }
234- out [j ++] = ']' ;
235- i ++;
236- break ;
237- } else {
238- break ;
239230 }
231+ } else if (endOfInput ) {
232+ // seen "]", then end of input.
233+ if (j >= m ) {
234+ return overflow (input , i , output , j );
235+ }
236+ out [j ++] = ']' ;
237+ i ++;
238+ break ;
239+ } else {
240+ break ;
240241 }
241242 } else if (ch < Character .MIN_HIGH_SURROGATE ) {
242243 if (ch > Unicode .MAX_C1_CTRL_CHAR || ch == Unicode .NEL ) {
@@ -284,11 +285,12 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
284285 } else {
285286 break ;
286287 }
287- } else if ( // low surrogate without preceding high surrogate
288+ } else if (// low surrogate without preceding high surrogate
288289 ch <= Character .MAX_LOW_SURROGATE
289- || // non characters
290- ch > '\ufffd'
291- || ('\ufdd0' <= ch && ch <= '\ufdef' )) {
290+ // or non-characters
291+ || ch > '\ufffd'
292+ || ('\ufdd0' <= ch && ch <= '\ufdef' ))
293+ {
292294 if (j >= m ) {
293295 return overflow (input , i , output , j );
294296 }
@@ -299,9 +301,7 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
299301 }
300302 out [j ++] = ch ;
301303 }
302-
303304 }
304-
305305 return underflow (input , i , output , j );
306306 }
307307
0 commit comments