Resolve all MISRA violations (#19)

dan4thewin · web-flow · commit a25d12114f21 · 2020-09-11T19:04:28.000-04:00
diff --git a/lexicon.txt b/lexicon.txt
@@ -5,6 +5,7 @@ bf
 bmp
 buf
 bufferlength
+cbmc
 com
 cond
 corejson
@@ -18,6 +19,7 @@ ef
 endcode
 endcond
 endif
+enum
 enums
 fb
 fc
@@ -36,6 +38,7 @@ jsonmaxdepthexceeded
 jsonnotfound
 jsonnullparameter
 jsonpartial
+jsonstatus
 jsonsuccess
 keylength
 len
diff --git a/source/core_json.c b/source/core_json.c
@@ -36,10 +36,19 @@ typedef enum
     false = 0
 } bool_;
 
-#define isdigit_( x )    ( ( x >= '0' ) && ( x <= '9' ) )
-#define iscntrl_( x )    ( ( x >= '\0' ) && ( x < ' ' ) )
+/* A compromise to satisfy both MISRA and CBMC */
+typedef union
+{
+    char c;
+    uint8_t u;
+} char_;
+
+#define isdigit_( x )    ( ( ( x ) >= '0' ) && ( ( x ) <= '9' ) )
+#define iscntrl_( x )    ( ( ( x ) >= '\0' ) && ( ( x ) < ' ' ) )
 /* NB. This is whitespace as defined by the JSON standard (ECMA-404). */
-#define isspace_( x )    ( ( x == ' ' ) || ( x == '\t' ) || ( x == '\n' ) || ( x == '\r' ) )
+#define isspace_( x )                          \
+    ( ( ( x ) == ' ' ) || ( ( x ) == '\t' ) || \
+      ( ( x ) == '\n' ) || ( ( x ) == '\r' ) )
 
 /**
  * @brief Advance buffer index beyond whitespace.
@@ -112,7 +121,7 @@ static bool_ shortestUTF8( size_t length,
     bool_ ret = false;
     uint32_t min, max;
 
-    assert( ( length >= 2 ) && ( length <= 4 ) );
+    assert( ( length >= 2U ) && ( length <= 4U ) );
 
     switch( length )
     {
@@ -171,20 +180,20 @@ static bool_ skipUTF8MultiByte( const char * buf,
     bool_ ret = false;
     size_t i, bitCount, j;
     uint32_t value = 0;
-    uint8_t c;
+    char_ c;
 
     assert( ( buf != NULL ) && ( start != NULL ) && ( max > 0U ) );
 
     i = *start;
     assert( i < max );
-    assert( buf[ i ] < 0 );
+    assert( buf[ i ] < '\0' );
 
-    c = ( 0x80U | ( buf[ i ] & 0x7F ) );
+    c.c = buf[ i ];
 
-    if( ( c > 0xC1U ) && ( c < 0xF5U ) )
+    if( ( c.u > 0xC1U ) && ( c.u < 0xF5U ) )
     {
-        bitCount = countHighBits( c );
-        value = ( ( uint32_t ) c ) & ( ( ( uint32_t ) 1 << ( 7U - bitCount ) ) - 1U );
+        bitCount = countHighBits( c.u );
+        value = ( ( uint32_t ) c.u ) & ( ( ( uint32_t ) 1 << ( 7U - bitCount ) ) - 1U );
 
         /* The bit count is 1 greater than the number of bytes,
          * e.g., when j is 2, we skip one more byte. */
@@ -197,13 +206,15 @@ static bool_ skipUTF8MultiByte( const char * buf,
                 break;
             }
 
+            c.c = buf[ i ];
+
             /* Additional bytes must match 10xxxxxx. */
-            if( ( buf[ i ] >= 0 ) || ( ( buf[ i ] & 0x40 ) != 0 ) )
+            if( ( c.u & 0xC0U ) != 0x80U )
             {
                 break;
             }
 
-            value = ( value << 6U ) | ( buf[ i ] & 0x3F );
+            value = ( value << 6U ) | ( c.u & 0x3FU );
         }
 
         if( ( j == 0U ) && ( shortestUTF8( bitCount, value ) == true ) )
@@ -237,7 +248,7 @@ static bool_ skipUTF8( const char * buf,
     if( *start < max )
     {
         /* an ASCII byte */
-        if( buf[ *start ] >= 0 )
+        if( buf[ *start ] >= '\0' )
         {
             *start += 1U;
             ret = true;
@@ -256,69 +267,62 @@ static bool_ skipUTF8( const char * buf,
  *
  * @param[in] c  The character to convert.
  *
- * @return the integer value upon success or UINT8_MAX on failure.
+ * @return the integer value upon success or NOT_A_HEX_CHAR on failure.
  */
+#define NOT_A_HEX_CHAR    ( 0x10U )
 static uint8_t hexToInt( char c )
 {
-    uint8_t n;
+    char_ n;
+
+    n.c = c;
 
     if( ( c >= 'a' ) && ( c <= 'f' ) )
     {
-        n = 10U + ( uint8_t ) ( c - 'a' );
+        n.c -= 'a';
+        n.u += 10U;
     }
     else if( ( c >= 'A' ) && ( c <= 'F' ) )
     {
-        n = 10U + ( uint8_t ) ( c - 'A' );
+        n.c -= 'A';
+        n.u += 10U;
     }
     else if( isdigit_( c ) )
     {
-        n = ( uint8_t ) ( c - '0' );
+        n.c -= '0';
     }
     else
     {
-        n = UINT8_MAX;
+        n.u = NOT_A_HEX_CHAR;
     }
 
-    return n;
+    return n.u;
 }
 
 /**
- * @brief Advance buffer index beyond a \u Unicode escape sequence.
+ * @brief Advance buffer index beyond a single \u Unicode
+ * escape sequence and output the value.
  *
  * @param[in] buf  The buffer to parse.
  * @param[in,out] start  The index at which to begin.
  * @param[in] max  The size of the buffer.
- * @param[in] requireLowSurrogate  true when a low surrogate is required.
- *
- * Surrogate pairs are two escape sequences that together denote
- * a code point outside the Basic Multilingual Plane.  They must
- * occur as a pair with the first "high" value in [U+D800, U+DBFF],
- * and the second "low" value in [U+DC00, U+DFFF].
+ * @param[out] outValue  The value of the hex digits.
  *
  * @return true if a valid escape sequence was present;
  * false otherwise.
  *
  * @note For the sake of security, \u0000 is disallowed.
  */
-#define isHighSurrogate( x )    ( ( ( x ) >= 0xD800U ) && ( ( x ) <= 0xDBFFU ) )
-#define isLowSurrogate( x )     ( ( ( x ) >= 0xDC00U ) && ( ( x ) <= 0xDFFFU ) )
-
-/* MISRA Rule 17.2 prohibits recursion due to the
- * risk of exceeding available stack space.  In this
- * function, recursion is limited to exactly one level;
- * the recursive call sets the final argument to true
- * which satisfies the base case. */
-/* coverity[misra_c_2012_rule_17_2_violation] */
-static bool_ skipHexEscape( const char * buf,
-                            size_t * start,
-                            size_t max,
-                            bool_ requireLowSurrogate )
+static bool_ skipOneHexEscape( const char * buf,
+                               size_t * start,
+                               size_t max,
+                               uint16_t * outValue )
 {
     bool_ ret = false;
     size_t i, end;
     uint16_t value = 0;
 
     assert( ( buf != NULL ) && ( start != NULL ) && ( max > 0U ) );
+    assert( outValue != NULL );
 
     i = *start;
 #define HEX_ESCAPE_LENGTH    ( 6U )   /* e.g., \u1234 */
@@ -330,37 +334,75 @@ static bool_ skipHexEscape( const char * buf,
         {
             uint8_t n = hexToInt( buf[ i ] );
 
-            if( n == UINT8_MAX )
+            if( n == NOT_A_HEX_CHAR )
             {
                 break;
             }
 
             value = ( value << 4U ) | n;
         }
+    }
 
-        if( ( i == end ) && ( value > 0U ) )
+    if( ( i == end ) && ( value > 0U ) )
+    {
+        ret = true;
+        *outValue = value;
+        *start = i;
+    }
+
+    return ret;
+}
+
+/**
+ * @brief Advance buffer index beyond one or a pair of \u Unicode escape sequences.
+ *
+ * @param[in] buf  The buffer to parse.
+ * @param[in,out] start  The index at which to begin.
+ * @param[in] max  The size of the buffer.
+ *
+ * Surrogate pairs are two escape sequences that together denote
+ * a code point outside the Basic Multilingual Plane.  They must
+ * occur as a pair with the first "high" value in [U+D800, U+DBFF],
+ * and the second "low" value in [U+DC00, U+DFFF].
+ *
+ * @return true if a valid escape sequence was present;
+ * false otherwise.
+ *
+ * @note For the sake of security, \u0000 is disallowed.
+ */
+#define isHighSurrogate( x )    ( ( ( x ) >= 0xD800U ) && ( ( x ) <= 0xDBFFU ) )
+#define isLowSurrogate( x )     ( ( ( x ) >= 0xDC00U ) && ( ( x ) <= 0xDFFFU ) )
+
+static bool_ skipHexEscape( const char * buf,
+                            size_t * start,
+                            size_t max )
+{
+    bool_ ret = false;
+    size_t i;
+    uint16_t value;
+
+    assert( ( buf != NULL ) && ( start != NULL ) && ( max > 0U ) );
+
+    i = *start;
+
+    if( skipOneHexEscape( buf, &i, max, &value ) == true )
+    {
+        if( isHighSurrogate( value ) )
         {
-            if( requireLowSurrogate == true )
-            {
-                if( isLowSurrogate( value ) )
-                {
-                    ret = true;
-                }
-            }
-            else if( isHighSurrogate( value ) )
-            {
-                /* low surrogate must follow */
-                ret = skipHexEscape( buf, &i, max, true );
-            }
-            else if( isLowSurrogate( value ) )
-            {
-                /* premature low surrogate */
-            }
-            else
+            if( ( skipOneHexEscape( buf, &i, max, &value ) == true ) &&
+                ( isLowSurrogate( value ) ) )
             {
                 ret = true;
             }
         }
+        else if( isLowSurrogate( value ) )
+        {
+            /* premature low surrogate */
+        }
+        else
+        {
+            ret = true;
+        }
     }
 
     if( ret == true )
@@ -404,7 +446,7 @@ static bool_ skipEscape( const char * buf,
                 break;
 
             case 'u':
-                ret = skipHexEscape( buf, &i, max, false );
+                ret = skipHexEscape( buf, &i, max );
                 break;
 
             case '"':
@@ -587,7 +629,7 @@ static bool_ skipAnyLiteral( const char * buf,
     bool_ ret = false;
 
 #define skipLit_( x ) \
-    ( skipLiteral( buf, start, max, x, ( sizeof( x ) - 1U ) ) == true )
+    ( skipLiteral( buf, start, max, ( x ), ( sizeof( x ) - 1U ) ) == true )
 
     if( skipLit_( "true" ) || skipLit_( "false" ) || skipLit_( "null" ) )
     {
@@ -1262,7 +1304,7 @@ static JSONStatus_t search( char * buf,
  */
 JSONStatus_t JSON_Search( char * buf,
                           size_t max,
-                          char * queryKey,
+                          const char * queryKey,
                           size_t queryKeyLength,
                           char separator,
                           char ** outValue,
diff --git a/source/include/core_json.h b/source/include/core_json.h
@@ -158,7 +158,7 @@ JSONStatus_t JSON_Validate( const char * buf,
 /* @[declare_json_search] */
 JSONStatus_t JSON_Search( char * buf,
                           size_t max,
-                          char * queryKey,
+                          const char * queryKey,
                           size_t queryKeyLength,
                           char separator,
                           char ** outValue,
diff --git a/test/cbmc/proofs/skipEscape/Makefile b/test/cbmc/proofs/skipEscape/Makefile
@@ -9,6 +9,7 @@ CBMC_MAX_BUFSIZE=14
 
 UNWINDSET += skipEscape.0:$(CBMC_MAX_BUFSIZE)
 UNWINDSET += skipHexEscape.0:$(CBMC_MAX_BUFSIZE)
+UNWINDSET += skipOneHexEscape.0:$(CBMC_MAX_BUFSIZE)
 
 include ../Makefile-json.common
 
diff --git a/test/cbmc/proofs/skipEscape/README.md b/test/cbmc/proofs/skipEscape/README.md
@@ -8,6 +8,7 @@ The proof runs in a few seconds and provides complete coverage of:
 * hexToInt()
 * skipEscape()
 * skipHexEscape()
+* skipOneHexEscape()
 
 To run the proof.
 * Add cbmc, goto-cc, goto-instrument, goto-analyzer, and cbmc-viewer
diff --git a/tools/coverity/misra.config b/tools/coverity/misra.config
@@ -0,0 +1,34 @@
+// MISRA C-2012 Rules
+
+{
+    version : "2.0",
+    standard : "c2012",
+    title: "Coverity MISRA Configuration",
+    deviations : [
+        {
+            deviation: "Directive 4.9",
+            category: "Advisory",
+            reason: "Allow inclusion of function like macros."
+        },
+        {
+            deviation: "Rule 15.4",
+            category: "Advisory",
+            reason: "Allow more then one break statement to terminate a loop"
+        },
+        {
+            deviation: "Rule 19.2",
+            category: "Advisory",
+            reason: "Allow a union of a signed and unsigned type of identical sizes."
+        },
+        {
+            deviation: "Rule 3.1",
+            category: "Required",
+            reason: "Allow nested comments. Documentation blocks contain comments for example code."
+        },
+        {
+            deviation: "Rule 20.12",
+            category: "Required",
+            reason: "Allow use of assert(), which uses a parameter in both expanded and raw forms."
+        },
+    ]
+}