Skip to content

Commit 5b5dd5f

Browse files
committed
XALANJ-2617 Fixed serializer for high-surrogate UTF-16 characters also for attribute values
1 parent 8a735e5 commit 5b5dd5f

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

src/org/apache/xml/serializer/ToStream.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* limitations under the License.
1717
*/
1818
/*
19-
* $Id$
19+
* $Id: ToStream.java 1225444 2011-12-29 05:52:39Z mrglavas $
2020
*/
2121
package org.apache.xml.serializer;
2222

@@ -2109,6 +2109,7 @@ public void writeAttrString(
21092109
}
21102110
string.getChars(0,len, m_attrBuff, 0);
21112111
final char[] stringChars = m_attrBuff;
2112+
int lastDirtyCharProcessed = -1;
21122113

21132114
for (int i = 0; i < len; i++)
21142115
{
@@ -2118,7 +2119,7 @@ public void writeAttrString(
21182119
// The character is supposed to be replaced by a String
21192120
// e.g. '&' --> "&amp;"
21202121
// e.g. '<' --> "&lt;"
2121-
accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
2122+
lastDirtyCharProcessed = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
21222123
}
21232124
else {
21242125
if (0x0 <= ch && ch <= 0x1F) {
@@ -2140,17 +2141,21 @@ public void writeAttrString(
21402141

21412142
case CharInfo.S_HORIZONAL_TAB:
21422143
writer.write("&#9;");
2144+
lastDirtyCharProcessed = i;
21432145
break;
21442146
case CharInfo.S_LINEFEED:
21452147
writer.write("&#10;");
2148+
lastDirtyCharProcessed = i;
21462149
break;
21472150
case CharInfo.S_CARRIAGERETURN:
21482151
writer.write("&#13;");
2152+
lastDirtyCharProcessed = i;
21492153
break;
21502154
default:
21512155
writer.write("&#");
21522156
writer.write(Integer.toString(ch));
21532157
writer.write(';');
2158+
lastDirtyCharProcessed = i;
21542159
break;
21552160

21562161
}
@@ -2159,23 +2164,31 @@ else if (ch < 0x7F) {
21592164
// Range 0x20 through 0x7E inclusive
21602165
// Normal ASCII chars
21612166
writer.write(ch);
2167+
lastDirtyCharProcessed = i;
21622168
}
21632169
else if (ch <= 0x9F){
21642170
// Range 0x7F through 0x9F inclusive
21652171
// More control characters
21662172
writer.write("&#");
21672173
writer.write(Integer.toString(ch));
21682174
writer.write(';');
2175+
lastDirtyCharProcessed = i;
21692176
}
21702177
else if (ch == CharInfo.S_LINE_SEPARATOR) {
21712178
// LINE SEPARATOR
21722179
writer.write("&#8232;");
2180+
lastDirtyCharProcessed = i;
2181+
}
2182+
else if (Encodings.isHighUTF16Surrogate(ch)) {
2183+
lastDirtyCharProcessed = processDirty(stringChars, len, i, ch, lastDirtyCharProcessed, false);
2184+
i = lastDirtyCharProcessed;
21732185
}
21742186
else if (m_encodingInfo.isInEncoding(ch)) {
21752187
// If the character is in the encoding, and
21762188
// not in the normal ASCII range, we also
21772189
// just write it out
21782190
writer.write(ch);
2191+
lastDirtyCharProcessed = i;
21792192
}
21802193
else {
21812194
// This is a fallback plan, we should never get here
@@ -2185,6 +2198,7 @@ else if (m_encodingInfo.isInEncoding(ch)) {
21852198
writer.write("&#");
21862199
writer.write(Integer.toString(ch));
21872200
writer.write(';');
2201+
lastDirtyCharProcessed = i;
21882202
}
21892203

21902204
}

0 commit comments

Comments
 (0)