Start next version

peteroupc · peteroupc · commit b7cd29bfe31f · 2014-10-03T10:00:04.000-04:00
diff --git a/pom.xml b/pom.xml
@@ -4,7 +4,7 @@
 	<groupId>com.upokecenter</groupId>
 	<artifactId>cbor</artifactId>
 	<packaging>jar</packaging>
-	<version>2.0.0</version>
+	<version>2.1.0-SNAPSHOT</version>
 	<name>CBOR</name>
 	<description>
     A Java implementation of Concise Binary Object Representation (CBOR), a general-purpose binary data format defined in RFC 7049. According to that RFC, CBOR's data model "is an extended version of the JSON data model", supporting many more types of data than JSON. This implementation was written by Peter O. and is released to the Public Domain under the CC0 Declaration.
diff --git a/src/main/java/com/upokecenter/cbor/StringOutput.java b/src/main/java/com/upokecenter/cbor/StringOutput.java
@@ -106,8 +106,8 @@ public void WriteCodePoint(int codePoint) throws java.io.IOException {
           }
         } else if (codePoint <= 0x10ffff) {
           this.builder.append((char)((((codePoint - 0x10000) >> 10) &
-                                      0x3ff) +0xd800));
-          this.builder.append((char)(((codePoint - 0x10000) & 0x3ff) +0xdc00));
+                                      0x3ff) + 0xd800));
+          this.builder.append((char)(((codePoint - 0x10000) & 0x3ff) + 0xdc00));
         }
       }
     }
diff --git a/src/test/java/com/upokecenter/test/DataUtilitiesTest.java b/src/test/java/com/upokecenter/test/DataUtilitiesTest.java
@@ -1,12 +1,67 @@
 package com.upokecenter.test;
 
+import java.util.*;
 import java.io.*;
 
 import org.junit.Assert;
 import org.junit.Test;
 import com.upokecenter.util.*;
 
   public class DataUtilitiesTest {
+    public static List<byte[]> GenerateIllegalUtf8Sequences() {
+      ArrayList<byte[]> list = new ArrayList<byte[]>();
+      // Generate illegal single bytes
+      for (int i = 0x80; i <= 0xff; ++i) {
+        if (i < 0xc2 || i > 0xf4) {
+          list.add(new byte[] { (byte)i, (byte)0x80  });
+        }
+        list.add(new byte[] { (byte)i  });
+      }
+      list.add(new byte[] { (byte)0xe0, (byte)0xa0  });
+      list.add(new byte[] { (byte)0xe1, (byte)0x80  });
+      list.add(new byte[] { (byte)0xef, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf0, (byte)0x90  });
+      list.add(new byte[] { (byte)0xf1, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf3, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf4, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf0, (byte)0x90, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf1, (byte)0x80, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf3, (byte)0x80, (byte)0x80  });
+      list.add(new byte[] { (byte)0xf4, (byte)0x80, (byte)0x80  });
+      // Generate illegal multibyte sequences
+      for (int i = 0x00; i <= 0xff; ++i) {
+        if (i < 0x80 || i > 0xbf) {
+          list.add(new byte[] { (byte)0xc2, (byte)i  });
+          list.add(new byte[] { (byte)0xdf, (byte)i  });
+          list.add(new byte[] { (byte)0xe1, (byte)i, (byte)0x80  });
+          list.add(new byte[] { (byte)0xef, (byte)i, (byte)0x80  });
+          list.add(new byte[] { (byte)0xf1, (byte)i, (byte)0x80, (byte)0x80  });
+          list.add(new byte[] { (byte)0xf3, (byte)i, (byte)0x80, (byte)0x80  });
+          list.add(new byte[] { (byte)0xe0, (byte)0xa0, (byte)i  });
+          list.add(new byte[] { (byte)0xe1, (byte)0x80, (byte)i  });
+          list.add(new byte[] { (byte)0xef, (byte)0x80, (byte)i  });
+          list.add(new byte[] { (byte)0xf0, (byte)0x90, (byte)i, (byte)0x80  });
+          list.add(new byte[] { (byte)0xf1, (byte)0x80, (byte)i, (byte)0x80  });
+          list.add(new byte[] { (byte)0xf3, (byte)0x80, (byte)i, (byte)0x80  });
+          list.add(new byte[] { (byte)0xf4, (byte)0x80, (byte)i, (byte)0x80  });
+          list.add(new byte[] { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)i  });
+          list.add(new byte[] { (byte)0xf1, (byte)0x80, (byte)0x80, (byte)i  });
+          list.add(new byte[] { (byte)0xf3, (byte)0x80, (byte)0x80, (byte)i  });
+          list.add(new byte[] { (byte)0xf4, (byte)0x80, (byte)0x80, (byte)i  });
+        }
+        if (i < 0xa0 || i > 0xbf) {
+          list.add(new byte[] { (byte)0xe0, (byte)i, (byte)0x80  });
+        }
+        if (i < 0x90 || i > 0xbf) {
+          list.add(new byte[] { (byte)0xf0, (byte)i, (byte)0x80, (byte)0x80  });
+        }
+        if (i < 0x80 || i > 0x8f) {
+          list.add(new byte[] { (byte)0xf4, (byte)i, (byte)0x80, (byte)0x80  });
+        }
+      }
+      return list;
+    }
+
     @Test
     public void TestCodePointAt() {
       try {
@@ -40,7 +95,7 @@ public void TestCodePointCompare() {
         ((DataUtilities.CodePointCompare("abc", "abc")==0) ? 0 : ((DataUtilities.CodePointCompare("abc", "abc")< 0) ? -1 : 1)));
       Assert.assertEquals(
         0,
-   ((DataUtilities.CodePointCompare("\ud800\udc00" , "\ud800\udc00"
+        ((DataUtilities.CodePointCompare("\ud800\udc00" , "\ud800\udc00"
 )==0) ? 0 : ((DataUtilities.CodePointCompare("\ud800\udc00" , "\ud800\udc00"
 )< 0) ? -1 : 1)));
       Assert.assertEquals(
@@ -180,6 +235,31 @@ public void TestGetUtf8String() {
         Assert.fail(ex.toString());
         throw new IllegalStateException("", ex);
       }
+      List<byte[]> illegalSeqs = GenerateIllegalUtf8Sequences();
+      for (byte[] seq : illegalSeqs) {
+        try {
+          DataUtilities.GetUtf8String(seq, false);
+          Assert.fail("Should have failed");
+        } catch (IllegalArgumentException ex) {
+        } catch (Exception ex) {
+          Assert.fail(ex.toString());
+          throw new IllegalStateException("", ex);
+        }
+        String strret = DataUtilities.GetUtf8String(seq, true);
+        if (!(strret.length() > 0))Assert.fail();
+        Assert.assertEquals('\ufffd', strret.charAt(0));
+        try {
+          DataUtilities.GetUtf8String(seq, 0, seq.length, false);
+          Assert.fail("Should have failed");
+        } catch (IllegalArgumentException ex) {
+        } catch (Exception ex) {
+          Assert.fail(ex.toString());
+          throw new IllegalStateException("", ex);
+        }
+        strret = DataUtilities.GetUtf8String(seq, 0, seq.length, true);
+        if (!(strret.length() > 0))Assert.fail();
+        Assert.assertEquals('\ufffd', strret.charAt(0));
+      }
     }
     @Test
     public void TestReadUtf8() {
@@ -294,6 +374,42 @@ public void TestReadUtf8ToString() {
         Assert.fail(ex.toString());
         throw new IllegalStateException("", ex);
       }
+      List<byte[]> illegalSeqs = GenerateIllegalUtf8Sequences();
+      for (byte[] seq : illegalSeqs) {
+        java.io.ByteArrayInputStream ms = null;
+try {
+ms = new java.io.ByteArrayInputStream(seq);
+
+          try {
+            DataUtilities.ReadUtf8ToString(ms, -1, false);
+            Assert.fail("Should have failed");
+          } catch (IOException ex) {
+          } catch (Exception ex) {
+            Assert.fail(ex.toString());
+            throw new IllegalStateException("", ex);
+          }
+}
+finally {
+try { if (ms != null)ms.close(); } catch (java.io.IOException ex) {}
+}
+        java.io.ByteArrayInputStream ms = null;
+try {
+ms = new java.io.ByteArrayInputStream(seq);
+
+          String strret = null;
+          try {
+            strret = DataUtilities.ReadUtf8ToString(ms, -1, true);
+          } catch (Exception ex) {
+            Assert.fail(ex.toString());
+            throw new IllegalStateException("", ex);
+          }
+          if (!(strret.length() > 0))Assert.fail();
+          Assert.assertEquals('\ufffd', strret.charAt(0));
+}
+finally {
+try { if (ms != null)ms.close(); } catch (java.io.IOException ex) {}
+}
+      }
     }
     @Test
     public void TestToLowerCaseAscii() {

Original file line number	Diff line number	Diff line change
`@@ -106,8 +106,8 @@ public void WriteCodePoint(int codePoint) throws java.io.IOException {`
`106`	`106`	`}`
`107`	`107`	`} else if (codePoint <= 0x10ffff) {`
`108`	`108`	`this.builder.append((char)((((codePoint - 0x10000) >> 10) &`
`109`		`- 0x3ff) +0xd800));`
`110`		`- this.builder.append((char)(((codePoint - 0x10000) & 0x3ff) +0xdc00));`
	`109`	`+ 0x3ff) + 0xd800));`
	`110`	`+ this.builder.append((char)(((codePoint - 0x10000) & 0x3ff) + 0xdc00));`
`111`	`111`	`}`
`112`	`112`	`}`
`113`	`113`	`}`