3131// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3232// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
3333// OF THE POSSIBILITY OF SUCH DAMAGE.
34-
3534package org .owasp .encoder ;
3635
3736import java .nio .CharBuffer ;
3837import java .nio .charset .CoderResult ;
3938
4039/**
41- * CDATAEncoder -- encoder for CDATA sections. CDATA sections are generally
42- * good for including large blocks of text that contain characters that
43- * normally require encoding (ampersand, quotes, less-than, etc...). The
44- * CDATA context however still does not allow invalid characters, and can
45- * be closed by the sequence "]]>". This encoder removes invalid XML
46- * characters, and encodes "]]>" (to "]]>]]<![CDATA[>"). The result is
47- * that the data integrity is maintained, but the code receiving the output
48- * will have to handle multiple CDATA events with character events between.
49- * As an alternate approach, the caller could pre-encode "]]>" to something
50- * of their choosing (e.g. data.replaceAll("\\]\\]>", "]] >")), then use
51- * this encoder to remove any invalid XML characters.
40+ * CDATAEncoder -- encoder for CDATA sections. CDATA sections are generally good for including large blocks of text that contain
41+ * characters that normally require encoding (ampersand, quotes, less-than, etc...). The CDATA context however still does not
42+ * allow invalid characters, and can be closed by the sequence "]]>". This encoder removes invalid XML characters, and encodes
43+ * "]]>" (to "]]>]]<![CDATA[>"). The result is that the data integrity is maintained, but the code receiving the output will
44+ * have to handle multiple CDATA events with character events between. As an alternate approach, the caller could pre-encode "]]>"
45+ * to something of their choosing (e.g. data.replaceAll("\\]\\]>", "]] >")), then use this encoder to remove any invalid XML
46+ * characters.
5247 *
5348 * @author Jeff Ichnowski
5449 */
5550class CDATAEncoder extends Encoder {
5651
57- /** The encoding of @{code "]]>"}. */
58- private static final char [] CDATA_END_ENCODED =
59- "]]>]]<![CDATA[>" .toCharArray ();
52+ /**
53+ * The encoding of @{code "]]>"}.
54+ */
55+ private static final char [] CDATA_END_ENCODED
56+ = "]]>]]<![CDATA[>" .toCharArray ();
6057
61- /** Length of {@code "]]>]]<![CDATA[>"}. */
58+ /**
59+ * Length of {@code "]]>]]<![CDATA[>"}.
60+ */
6261 private static final int CDATA_END_ENCODED_LENGTH = 15 ;
6362
64- /** Length of {@code "]]>"}. */
63+ /**
64+ * Length of {@code "]]>"}.
65+ */
6566 private static final int CDATA_END_LENGTH = 3 ;
6667
6768 @ Override
@@ -83,35 +84,37 @@ protected int maxEncodedLength(int n) {
8384 @ Override
8485 protected int firstEncodedOffset (String input , int off , int len ) {
8586 final int n = off + len ;
86- int closeCount = 0 ;
87- for (int i = off ; i < n ; ++i ) {
87+ // int closeCount = 0; //unused...
88+ for (int i = off ; i < n ; ++i ) {
8889 char ch = input .charAt (i );
8990 if (ch <= Unicode .MAX_ASCII ) {
9091 if (ch != ']' ) {
91- if (ch >= ' ' || ch == '\n' || ch == '\r' || ch == '\t' ) {
92- // valid
93- } else {
92+ if (ch < ' ' && ch != '\n' && ch != '\r' && ch != '\t' ) {
9493 return i ;
94+ // } else {
95+ // // valid
9596 }
97+
9698 } else {
97- if (i + 1 < n ) {
98- if (input .charAt (i + 1 ) != ']' ) {
99+ if (i + 1 < n ) {
100+ if (input .charAt (i + 1 ) != ']' ) {
99101 // "]x" (next character is safe for this to be ']')
100102 } else {
101103 // "]]?"
102104 // keep looping through ']'
103- for ( ; i + 2 < n && input .charAt (i + 2 ) == ']' ; ++i ) {
105+ for (; i + 2 < n && input .charAt (i + 2 ) == ']' ; ++i ) {
104106 // valid
105107 }
106108 // at this point we've looped through a sequence
107109 // of 2 or more "]", if the next character is ">"
108110 // we need to encode "]]>".
109- if (i + 2 < n ) {
110- if (input .charAt (i + 2 ) == '>' ) {
111+ if (i + 2 < n ) {
112+ if (input .charAt (i + 2 ) == '>' ) {
111113 return i ;
112- } else {
113- // valid
114+ // } else {
115+ // // valid
114116 }
117+
115118 } else {
116119 return n ;
117120 }
@@ -121,15 +124,15 @@ protected int firstEncodedOffset(String input, int off, int len) {
121124 }
122125 }
123126 } else if (ch < Character .MIN_HIGH_SURROGATE ) {
124- if (ch > Unicode .MAX_C1_CTRL_CHAR || ch == Unicode .NEL ) {
125- // valid
126- } else {
127+ if (ch <= Unicode .MAX_C1_CTRL_CHAR && ch != Unicode .NEL ) {
127128 return i ;
129+ // } else {
130+ // // valid
128131 }
129132 } else if (ch <= Character .MAX_HIGH_SURROGATE ) {
130- if (i + 1 < n ) {
131- if (Character .isLowSurrogate (input .charAt (i + 1 ))) {
132- int cp = Character .toCodePoint (ch , input .charAt (i + 1 ));
133+ if (i + 1 < n ) {
134+ if (Character .isLowSurrogate (input .charAt (i + 1 ))) {
135+ int cp = Character .toCodePoint (ch , input .charAt (i + 1 ));
133136 if (Unicode .isNonCharacter (cp )) {
134137 return i ;
135138 } else {
@@ -143,16 +146,14 @@ protected int firstEncodedOffset(String input, int off, int len) {
143146 // end of input, high without low = invalid
144147 return i ;
145148 }
146- } else if (
147- // low surrogate without preceding high surrogate
148- ch <= Character .MAX_LOW_SURROGATE ||
149- // non characters
150- ch > '\ufffd' ||
151- ('\ufdd0' <= ch && ch <= '\ufdef' ))
152- {
149+ } else if ( // low surrogate without preceding high surrogate
150+ ch <= Character .MAX_LOW_SURROGATE
151+ || // non characters
152+ ch > '\ufffd'
153+ || ('\ufdd0' <= ch && ch <= '\ufdef' )) {
153154 return i ;
154- } else {
155- // valid
155+ // } else {
156+ // // valid
156157 }
157158
158159 }
@@ -168,7 +169,7 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
168169 int j = output .arrayOffset () + output .position ();
169170 final int m = output .arrayOffset () + output .limit ();
170171
171- for ( ; i < n ; ++i ) {
172+ for (; i < n ; ++i ) {
172173 char ch = in [i ];
173174 if (ch <= Unicode .MAX_ASCII ) {
174175 if (ch != ']' ) {
@@ -181,8 +182,8 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
181182 out [j ++] = XMLEncoder .INVALID_CHARACTER_REPLACEMENT ;
182183 }
183184 } else {
184- if (i + 1 < n ) {
185- if (in [i + 1 ] != ']' ) {
185+ if (i + 1 < n ) {
186+ if (in [i + 1 ] != ']' ) {
186187 // "]x" (next character is safe for this to be ']')
187188 if (j >= m ) {
188189 return overflow (input , i , output , j );
@@ -191,7 +192,7 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
191192 } else {
192193 // "]]?"
193194 // keep looping through ']'
194- for ( ; i + 2 < n && in [i + 2 ] == ']' ; ++i ) {
195+ for (; i + 2 < n && in [i + 2 ] == ']' ; ++i ) {
195196 if (j >= m ) {
196197 return overflow (input , i , output , j );
197198 }
@@ -200,9 +201,9 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
200201 // at this point we've looped through a sequence
201202 // of 2 or more "]", if the next character is ">"
202203 // we need to encode "]]>".
203- if (i + 2 < n ) {
204- if (in [i + 2 ] == '>' ) {
205- if (j + CDATA_END_ENCODED_LENGTH > m ) {
204+ if (i + 2 < n ) {
205+ if (in [i + 2 ] == '>' ) {
206+ if (j + CDATA_END_ENCODED_LENGTH > m ) {
206207 return overflow (input , i , output , j );
207208 }
208209 System .arraycopy (CDATA_END_ENCODED , 0 , out , j , CDATA_END_ENCODED_LENGTH );
@@ -215,7 +216,7 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
215216 out [j ++] = ']' ;
216217 }
217218 } else if (endOfInput ) {
218- if (j + 2 > m ) {
219+ if (j + 2 > m ) {
219220 return overflow (input , i , output , j );
220221 }
221222 out [j ++] = ']' ;
@@ -252,17 +253,17 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
252253 out [j ++] = XMLEncoder .INVALID_CHARACTER_REPLACEMENT ;
253254 }
254255 } else if (ch <= Character .MAX_HIGH_SURROGATE ) {
255- if (i + 1 < n ) {
256- if (Character .isLowSurrogate (in [i + 1 ])) {
257- int cp = Character .toCodePoint (ch , in [i + 1 ]);
256+ if (i + 1 < n ) {
257+ if (Character .isLowSurrogate (in [i + 1 ])) {
258+ int cp = Character .toCodePoint (ch , in [i + 1 ]);
258259 if (Unicode .isNonCharacter (cp )) {
259260 if (j >= m ) {
260261 return overflow (input , i , output , j );
261262 }
262263 out [j ++] = XMLEncoder .INVALID_CHARACTER_REPLACEMENT ;
263264 ++i ;
264265 } else {
265- if (j + 1 >= m ) {
266+ if (j + 1 >= m ) {
266267 return overflow (input , i , output , j );
267268 }
268269 out [j ++] = ch ;
@@ -284,13 +285,11 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
284285 } else {
285286 break ;
286287 }
287- } else if (
288- // low surrogate without preceding high surrogate
289- ch <= Character .MAX_LOW_SURROGATE ||
290- // non characters
291- ch > '\ufffd' ||
292- ('\ufdd0' <= ch && ch <= '\ufdef' ))
293- {
288+ } else if ( // low surrogate without preceding high surrogate
289+ ch <= Character .MAX_LOW_SURROGATE
290+ || // non characters
291+ ch > '\ufffd'
292+ || ('\ufdd0' <= ch && ch <= '\ufdef' )) {
294293 if (j >= m ) {
295294 return overflow (input , i , output , j );
296295 }
0 commit comments