|
1 | 1 | package com.upokecenter.test; |
2 | 2 |
|
| 3 | +import java.util.*; |
3 | 4 | import java.io.*; |
4 | 5 |
|
5 | 6 | import org.junit.Assert; |
6 | 7 | import org.junit.Test; |
7 | 8 | import com.upokecenter.util.*; |
8 | 9 |
|
9 | 10 | public class DataUtilitiesTest { |
| 11 | + public static List<byte[]> GenerateIllegalUtf8Sequences() { |
| 12 | + ArrayList<byte[]> list = new ArrayList<byte[]>(); |
| 13 | + // Generate illegal single bytes |
| 14 | + for (int i = 0x80; i <= 0xff; ++i) { |
| 15 | + if (i < 0xc2 || i > 0xf4) { |
| 16 | + list.add(new byte[] { (byte)i, (byte)0x80 }); |
| 17 | + } |
| 18 | + list.add(new byte[] { (byte)i }); |
| 19 | + } |
| 20 | + list.add(new byte[] { (byte)0xe0, (byte)0xa0 }); |
| 21 | + list.add(new byte[] { (byte)0xe1, (byte)0x80 }); |
| 22 | + list.add(new byte[] { (byte)0xef, (byte)0x80 }); |
| 23 | + list.add(new byte[] { (byte)0xf0, (byte)0x90 }); |
| 24 | + list.add(new byte[] { (byte)0xf1, (byte)0x80 }); |
| 25 | + list.add(new byte[] { (byte)0xf3, (byte)0x80 }); |
| 26 | + list.add(new byte[] { (byte)0xf4, (byte)0x80 }); |
| 27 | + list.add(new byte[] { (byte)0xf0, (byte)0x90, (byte)0x80 }); |
| 28 | + list.add(new byte[] { (byte)0xf1, (byte)0x80, (byte)0x80 }); |
| 29 | + list.add(new byte[] { (byte)0xf3, (byte)0x80, (byte)0x80 }); |
| 30 | + list.add(new byte[] { (byte)0xf4, (byte)0x80, (byte)0x80 }); |
| 31 | + // Generate illegal multibyte sequences |
| 32 | + for (int i = 0x00; i <= 0xff; ++i) { |
| 33 | + if (i < 0x80 || i > 0xbf) { |
| 34 | + list.add(new byte[] { (byte)0xc2, (byte)i }); |
| 35 | + list.add(new byte[] { (byte)0xdf, (byte)i }); |
| 36 | + list.add(new byte[] { (byte)0xe1, (byte)i, (byte)0x80 }); |
| 37 | + list.add(new byte[] { (byte)0xef, (byte)i, (byte)0x80 }); |
| 38 | + list.add(new byte[] { (byte)0xf1, (byte)i, (byte)0x80, (byte)0x80 }); |
| 39 | + list.add(new byte[] { (byte)0xf3, (byte)i, (byte)0x80, (byte)0x80 }); |
| 40 | + list.add(new byte[] { (byte)0xe0, (byte)0xa0, (byte)i }); |
| 41 | + list.add(new byte[] { (byte)0xe1, (byte)0x80, (byte)i }); |
| 42 | + list.add(new byte[] { (byte)0xef, (byte)0x80, (byte)i }); |
| 43 | + list.add(new byte[] { (byte)0xf0, (byte)0x90, (byte)i, (byte)0x80 }); |
| 44 | + list.add(new byte[] { (byte)0xf1, (byte)0x80, (byte)i, (byte)0x80 }); |
| 45 | + list.add(new byte[] { (byte)0xf3, (byte)0x80, (byte)i, (byte)0x80 }); |
| 46 | + list.add(new byte[] { (byte)0xf4, (byte)0x80, (byte)i, (byte)0x80 }); |
| 47 | + list.add(new byte[] { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)i }); |
| 48 | + list.add(new byte[] { (byte)0xf1, (byte)0x80, (byte)0x80, (byte)i }); |
| 49 | + list.add(new byte[] { (byte)0xf3, (byte)0x80, (byte)0x80, (byte)i }); |
| 50 | + list.add(new byte[] { (byte)0xf4, (byte)0x80, (byte)0x80, (byte)i }); |
| 51 | + } |
| 52 | + if (i < 0xa0 || i > 0xbf) { |
| 53 | + list.add(new byte[] { (byte)0xe0, (byte)i, (byte)0x80 }); |
| 54 | + } |
| 55 | + if (i < 0x90 || i > 0xbf) { |
| 56 | + list.add(new byte[] { (byte)0xf0, (byte)i, (byte)0x80, (byte)0x80 }); |
| 57 | + } |
| 58 | + if (i < 0x80 || i > 0x8f) { |
| 59 | + list.add(new byte[] { (byte)0xf4, (byte)i, (byte)0x80, (byte)0x80 }); |
| 60 | + } |
| 61 | + } |
| 62 | + return list; |
| 63 | + } |
| 64 | + |
10 | 65 | @Test |
11 | 66 | public void TestCodePointAt() { |
12 | 67 | try { |
@@ -40,7 +95,7 @@ public void TestCodePointCompare() { |
40 | 95 | ((DataUtilities.CodePointCompare("abc", "abc")==0) ? 0 : ((DataUtilities.CodePointCompare("abc", "abc")< 0) ? -1 : 1))); |
41 | 96 | Assert.assertEquals( |
42 | 97 | 0, |
43 | | - ((DataUtilities.CodePointCompare("\ud800\udc00" , "\ud800\udc00" |
| 98 | + ((DataUtilities.CodePointCompare("\ud800\udc00" , "\ud800\udc00" |
44 | 99 | )==0) ? 0 : ((DataUtilities.CodePointCompare("\ud800\udc00" , "\ud800\udc00" |
45 | 100 | )< 0) ? -1 : 1))); |
46 | 101 | Assert.assertEquals( |
@@ -180,6 +235,31 @@ public void TestGetUtf8String() { |
180 | 235 | Assert.fail(ex.toString()); |
181 | 236 | throw new IllegalStateException("", ex); |
182 | 237 | } |
| 238 | + List<byte[]> illegalSeqs = GenerateIllegalUtf8Sequences(); |
| 239 | + for (byte[] seq : illegalSeqs) { |
| 240 | + try { |
| 241 | + DataUtilities.GetUtf8String(seq, false); |
| 242 | + Assert.fail("Should have failed"); |
| 243 | + } catch (IllegalArgumentException ex) { |
| 244 | + } catch (Exception ex) { |
| 245 | + Assert.fail(ex.toString()); |
| 246 | + throw new IllegalStateException("", ex); |
| 247 | + } |
| 248 | + String strret = DataUtilities.GetUtf8String(seq, true); |
| 249 | + if (!(strret.length() > 0))Assert.fail(); |
| 250 | + Assert.assertEquals('\ufffd', strret.charAt(0)); |
| 251 | + try { |
| 252 | + DataUtilities.GetUtf8String(seq, 0, seq.length, false); |
| 253 | + Assert.fail("Should have failed"); |
| 254 | + } catch (IllegalArgumentException ex) { |
| 255 | + } catch (Exception ex) { |
| 256 | + Assert.fail(ex.toString()); |
| 257 | + throw new IllegalStateException("", ex); |
| 258 | + } |
| 259 | + strret = DataUtilities.GetUtf8String(seq, 0, seq.length, true); |
| 260 | + if (!(strret.length() > 0))Assert.fail(); |
| 261 | + Assert.assertEquals('\ufffd', strret.charAt(0)); |
| 262 | + } |
183 | 263 | } |
184 | 264 | @Test |
185 | 265 | public void TestReadUtf8() { |
@@ -294,6 +374,42 @@ public void TestReadUtf8ToString() { |
294 | 374 | Assert.fail(ex.toString()); |
295 | 375 | throw new IllegalStateException("", ex); |
296 | 376 | } |
| 377 | + List<byte[]> illegalSeqs = GenerateIllegalUtf8Sequences(); |
| 378 | + for (byte[] seq : illegalSeqs) { |
| 379 | + java.io.ByteArrayInputStream ms = null; |
| 380 | +try { |
| 381 | +ms = new java.io.ByteArrayInputStream(seq); |
| 382 | + |
| 383 | + try { |
| 384 | + DataUtilities.ReadUtf8ToString(ms, -1, false); |
| 385 | + Assert.fail("Should have failed"); |
| 386 | + } catch (IOException ex) { |
| 387 | + } catch (Exception ex) { |
| 388 | + Assert.fail(ex.toString()); |
| 389 | + throw new IllegalStateException("", ex); |
| 390 | + } |
| 391 | +} |
| 392 | +finally { |
| 393 | +try { if (ms != null)ms.close(); } catch (java.io.IOException ex) {} |
| 394 | +} |
| 395 | + java.io.ByteArrayInputStream ms = null; |
| 396 | +try { |
| 397 | +ms = new java.io.ByteArrayInputStream(seq); |
| 398 | + |
| 399 | + String strret = null; |
| 400 | + try { |
| 401 | + strret = DataUtilities.ReadUtf8ToString(ms, -1, true); |
| 402 | + } catch (Exception ex) { |
| 403 | + Assert.fail(ex.toString()); |
| 404 | + throw new IllegalStateException("", ex); |
| 405 | + } |
| 406 | + if (!(strret.length() > 0))Assert.fail(); |
| 407 | + Assert.assertEquals('\ufffd', strret.charAt(0)); |
| 408 | +} |
| 409 | +finally { |
| 410 | +try { if (ms != null)ms.close(); } catch (java.io.IOException ex) {} |
| 411 | +} |
| 412 | + } |
297 | 413 | } |
298 | 414 | @Test |
299 | 415 | public void TestToLowerCaseAscii() { |
|
0 commit comments