|
40 | 40 | * Spec - The CONSTANT_Utf8_info Structure</a>
|
41 | 41 | */
|
42 | 42 | public sealed class ModifiedUTF8 permits Descriptor, Name {
|
| 43 | + private static final char REPLACEMENT_CHARACTER = 0xfffd; |
43 | 44 |
|
44 | 45 | /**
|
45 | 46 | * Type-safe cast from Symbol with ModifiedUTF8 bound to Symbol<ModifiedUTF8>.
|
@@ -146,6 +147,12 @@ public static String toJavaString(byte[] bytearr, int offset, int utflen) throws
|
146 | 147 | return toJavaString(ByteBuffer.wrap(bytearr, offset, utflen));
|
147 | 148 | }
|
148 | 149 |
|
| 150 | + /** |
| 151 | + * Creates a String from the contents of the buffer, interpreted as modified utf8. if the |
| 152 | + * contents is not valid modified utf8, {@link UTFDataFormatException} will be thrown. |
| 153 | + * |
| 154 | + * @throws UTFDataFormatException if the data is not valid modified utf8. |
| 155 | + */ |
149 | 156 | public static String toJavaString(ByteBuffer buffer) throws IOException {
|
150 | 157 | char[] chararr = new char[buffer.remaining()];
|
151 | 158 |
|
@@ -209,7 +216,87 @@ public static String toJavaString(ByteBuffer buffer) throws IOException {
|
209 | 216 | throw throwUTFDataFormatException(malformedInputMessage(buffer.position()));
|
210 | 217 | }
|
211 | 218 | }
|
212 |
| - // The number of chars produced may be less than utflen |
| 219 | + // The number of chars produced may be less than the size of the buffer |
| 220 | + return new String(chararr, 0, chararrCount); |
| 221 | + } |
| 222 | + |
| 223 | + /** |
| 224 | + * Creates a String from the contents of the buffer, interpreted as modified utf8. If some bytes |
| 225 | + * cannot be decoded, the Unicode replacement character (0xfffd) is used. |
| 226 | + */ |
| 227 | + public static String toJavaStringSafe(ByteBuffer buffer) { |
| 228 | + char[] chararr = new char[buffer.remaining()]; |
| 229 | + |
| 230 | + int c; |
| 231 | + int char2; |
| 232 | + int char3; |
| 233 | + int chararrCount = 0; |
| 234 | + |
| 235 | + while (buffer.hasRemaining()) { |
| 236 | + c = buffer.get() & 0xff; |
| 237 | + if (c > 127) { |
| 238 | + buffer.position(buffer.position() - 1); |
| 239 | + break; |
| 240 | + } |
| 241 | + chararr[chararrCount++] = (char) c; |
| 242 | + } |
| 243 | + |
| 244 | + while (buffer.hasRemaining()) { |
| 245 | + c = buffer.get() & 0xff; |
| 246 | + switch (c >> 4) { |
| 247 | + case 0: |
| 248 | + case 1: |
| 249 | + case 2: |
| 250 | + case 3: |
| 251 | + case 4: |
| 252 | + case 5: |
| 253 | + case 6: |
| 254 | + case 7: |
| 255 | + /* 0xxxxxxx */ |
| 256 | + chararr[chararrCount++] = (char) c; |
| 257 | + break; |
| 258 | + case 12: |
| 259 | + case 13: |
| 260 | + /* 110x xxxx 10xx xxxx */ |
| 261 | + if (!buffer.hasRemaining()) { |
| 262 | + if (chararrCount < chararr.length) { |
| 263 | + chararr[chararrCount] = REPLACEMENT_CHARACTER; |
| 264 | + break; |
| 265 | + } |
| 266 | + } |
| 267 | + char2 = buffer.get(); |
| 268 | + if ((char2 & 0xC0) != 0x80) { |
| 269 | + chararr[chararrCount++] = REPLACEMENT_CHARACTER; |
| 270 | + break; |
| 271 | + } |
| 272 | + chararr[chararrCount++] = (char) (((c & 0x1F) << 6) | |
| 273 | + (char2 & 0x3F)); |
| 274 | + break; |
| 275 | + case 14: |
| 276 | + /* 1110 xxxx 10xx xxxx 10xx xxxx */ |
| 277 | + if (buffer.remaining() < 2) { |
| 278 | + if (chararrCount < chararr.length) { |
| 279 | + chararr[chararrCount] = REPLACEMENT_CHARACTER; |
| 280 | + break; |
| 281 | + } |
| 282 | + } |
| 283 | + char2 = buffer.get(); |
| 284 | + char3 = buffer.get(); |
| 285 | + if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { |
| 286 | + chararr[chararrCount++] = REPLACEMENT_CHARACTER; |
| 287 | + break; |
| 288 | + } |
| 289 | + chararr[chararrCount++] = (char) (((c & 0x0F) << 12) | |
| 290 | + ((char2 & 0x3F) << 6) | |
| 291 | + ((char3 & 0x3F) << 0)); |
| 292 | + break; |
| 293 | + default: |
| 294 | + /* 10xx xxxx, 1111 xxxx */ |
| 295 | + chararr[chararrCount++] = REPLACEMENT_CHARACTER; |
| 296 | + break; |
| 297 | + } |
| 298 | + } |
| 299 | + // The number of chars produced may be less than the size of the buffer |
213 | 300 | return new String(chararr, 0, chararrCount);
|
214 | 301 | }
|
215 | 302 |
|
|
0 commit comments