pandas-dev · mroeschke · Dec 9, 2023 · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/meson.build b/meson.build
@@ -7,7 +7,8 @@ project(
     meson_version: '>=1.2.1',
     default_options: [
         'buildtype=release',
-        'c_std=c11'
+        'c_std=c11',
+        'warning_level=2',
     ]
 )
 

diff --git a/pandas/_libs/include/pandas/portable.h b/pandas/_libs/include/pandas/portable.h
@@ -23,3 +23,13 @@ The full license is in the LICENSE file, distributed with this software.
 #define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5))
 #define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c))
 #define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c))
+
+#define UNUSED(x) (void)(x)
+
+#if __has_attribute(__fallthrough__)
+#define PD_FALLTHROUGH __attribute__((__fallthrough__))
+#else
+#define PD_FALLTHROUGH                                                         \
+  do {                                                                         \
+  } while (0) /* fallthrough */
+#endif
diff --git a/pandas/_libs/src/datetime/pd_datetime.c b/pandas/_libs/src/datetime/pd_datetime.c
@@ -21,6 +21,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 
 #include "datetime.h"
 #include "pandas/datetime/pd_datetime.h"
+#include "pandas/portable.h"
 
 static void pandas_datetime_destructor(PyObject *op) {
   void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME);
@@ -189,6 +190,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
 }
 
 static int pandas_datetime_exec(PyObject *module) {
+  UNUSED(module);
   PyDateTime_IMPORT;
   PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI));
   if (capi == NULL) {

diff --git a/pandas/_libs/src/parser/pd_parser.c b/pandas/_libs/src/parser/pd_parser.c
@@ -101,6 +101,7 @@ static void pandas_parser_destructor(PyObject *op) {
 }
 
 static int pandas_parser_exec(PyObject *module) {
+  UNUSED(module);
   PandasParser_CAPI *capi = PyMem_Malloc(sizeof(PandasParser_CAPI));
   if (capi == NULL) {
     PyErr_NoMemory();

diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -795,7 +795,7 @@ static int tokenize_bytes(parser_t *self, size_t line_limit,
         break;
       } else if (!isblank(c)) {
         self->state = START_FIELD;
-        // fall through to subsequent state
+        PD_FALLTHROUGH; // fall through to subsequent state
       } else {
         // if whitespace char, keep slurping
         break;
@@ -849,12 +849,12 @@ static int tokenize_bytes(parser_t *self, size_t line_limit,
           self->state = WHITESPACE_LINE;
           break;
         }
-        // fall through
       }
 
       // normal character - fall through
       // to handle as START_FIELD
       self->state = START_FIELD;
+      PD_FALLTHROUGH;
     }
     case START_FIELD:
       // expecting field
@@ -1130,10 +1130,10 @@ int parser_consume_rows(parser_t *self, size_t nrows) {
 
   /* if word_deletions == 0 (i.e. this case) then char_count must
    * be 0 too, as no data needs to be skipped */
-  const int64_t char_count = word_deletions >= 1
-                                 ? (self->word_starts[word_deletions - 1] +
-                                    strlen(self->words[word_deletions - 1]) + 1)
-                                 : 0;
+  const uint64_t char_count =
+      word_deletions >= 1 ? (self->word_starts[word_deletions - 1] +
+                             strlen(self->words[word_deletions - 1]) + 1)
+                          : 0;
 
   TRACE(("parser_consume_rows: Deleting %d words, %d chars\n", word_deletions,
          char_count));
@@ -1415,9 +1415,11 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
   int negative = 0;
   switch (*p) {
   case '-':
-    negative = 1; // Fall through to increment position.
+    negative = 1;
+    PD_FALLTHROUGH; // Fall through to increment position.
   case '+':
     p++;
+    break;
   }
 
   int exponent = 0;
@@ -1485,9 +1487,11 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
     negative = 0;
     switch (*++p) {
     case '-':
-      negative = 1; // Fall through to increment pos.
+      negative = 1;
+      PD_FALLTHROUGH; // Fall through to increment position.
     case '+':
       p++;
+      break;
     }
 
     // Process string of digits.
@@ -1595,9 +1599,11 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
   int negative = 0;
   switch (*p) {
   case '-':
-    negative = 1; // Fall through to increment position.
+    negative = 1;
+    PD_FALLTHROUGH; // Fall through to increment position.
   case '+':
     p++;
+    break;
   }
 
   double number = 0.;
@@ -1656,9 +1662,11 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
     negative = 0;
     switch (*++p) {
     case '-':
-      negative = 1; // Fall through to increment pos.
+      negative = 1;
+      PD_FALLTHROUGH; // Fall through to increment position.
     case '+':
       p++;
+      break;
     }
 
     // Process string of digits.
@@ -1764,6 +1772,7 @@ static char *_str_copy_decimal_str_c(const char *s, char **endpos, char decimal,
 
 double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
                   int skip_trailing, int *error, int *maybe_int) {
+  UNUSED(sci);
   // 'normalize' representation to C-locale; replace decimal with '.' and
   // remove thousands separator.
   char *endptr;
@@ -1975,7 +1984,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
         break;
       }
       if ((number < pre_max) ||
-          ((number == pre_max) && (d - '0' <= dig_pre_max))) {
+          ((number == pre_max) && ((uint64_t)(d - '0') <= dig_pre_max))) {
         number = number * 10 + (d - '0');
         d = *++p;
 
@@ -1987,7 +1996,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
   } else {
     while (isdigit_ascii(d)) {
       if ((number < pre_max) ||
-          ((number == pre_max) && (d - '0' <= dig_pre_max))) {
+          ((number == pre_max) && ((uint64_t)(d - '0') <= dig_pre_max))) {
         number = number * 10 + (d - '0');
         d = *++p;
 

diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c
@@ -35,6 +35,7 @@ This file implements string parsing and creation for NumPy datetime.
 #include <numpy/ndarraytypes.h>
 #include <numpy/npy_common.h>
 
+#include "pandas/portable.h"
 #include "pandas/vendored/numpy/datetime/np_datetime.h"
 #include "pandas/vendored/numpy/datetime/np_datetime_strings.h"
 
@@ -767,27 +768,38 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
   /*    return 4;*/
   case NPY_FR_as:
     len += 3; /* "###" */
+    PD_FALLTHROUGH;
   case NPY_FR_fs:
     len += 3; /* "###" */
+    PD_FALLTHROUGH;
   case NPY_FR_ps:
     len += 3; /* "###" */
+    PD_FALLTHROUGH;
   case NPY_FR_ns:
     len += 3; /* "###" */
+    PD_FALLTHROUGH;
   case NPY_FR_us:
     len += 3; /* "###" */
+    PD_FALLTHROUGH;
   case NPY_FR_ms:
     len += 4; /* ".###" */
+    PD_FALLTHROUGH;
   case NPY_FR_s:
     len += 3; /* ":##" */
+    PD_FALLTHROUGH;
   case NPY_FR_m:
     len += 3; /* ":##" */
+    PD_FALLTHROUGH;
   case NPY_FR_h:
     len += 3; /* "T##" */
+    PD_FALLTHROUGH;
   case NPY_FR_D:
   case NPY_FR_W:
     len += 3; /* "-##" */
+    PD_FALLTHROUGH;
   case NPY_FR_M:
     len += 3; /* "-##" */
+    PD_FALLTHROUGH;
   case NPY_FR_Y:
     len += 21; /* 64-bit year */
     break;

diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
@@ -40,6 +40,7 @@ Numeric decoder derived from TCL library
 
 // Licence at LICENSES/ULTRAJSON_LICENSE
 
+#include "pandas/portable.h"
 #include "pandas/vendored/ujson/lib/ultrajson.h"
 #include <locale.h>
 #include <math.h>
@@ -461,6 +462,7 @@ int Buffer_EscapeStringUnvalidated(JSONObjectEncoder *enc, const char *io,
     {
       if (enc->encodeHTMLChars) {
         // Fall through to \u00XX case below.
+        PD_FALLTHROUGH;
       } else {
         // Same as default case below.
         (*of++) = (*io);
@@ -645,6 +647,7 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc,
     case 29: {
       if (enc->encodeHTMLChars) {
         // Fall through to \u00XX case 30 below.
+        PD_FALLTHROUGH;
       } else {
         // Same as case 1 above.
         *(of++) = (*io++);

diff --git a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c
@@ -41,6 +41,7 @@ Numeric decoder derived from TCL library
 #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
 #define NO_IMPORT_ARRAY
 #define PY_SSIZE_T_CLEAN
+#include "pandas/portable.h"
 #include "pandas/vendored/ujson/lib/ultrajson.h"
 #include <Python.h>
 #include <numpy/arrayobject.h>
@@ -77,61 +78,94 @@ void Npy_releaseContext(NpyArrContext *npyarr) {
 }
 
 static int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
+  UNUSED(prv);
   int ret = PyDict_SetItem(obj, name, value);
   Py_DECREF((PyObject *)name);
   Py_DECREF((PyObject *)value);
   return ret == 0 ? 1 : 0;
 }
 
 static int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) {
+  UNUSED(prv);
   int ret = PyList_Append(obj, value);
   Py_DECREF((PyObject *)value);
   return ret == 0 ? 1 : 0;
 }
 
 static JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) {
+  UNUSED(prv);
   return PyUnicode_FromWideChar(start, (end - start));
 }
 
-static JSOBJ Object_newTrue(void *prv) { Py_RETURN_TRUE; }
+static JSOBJ Object_newTrue(void *prv) {
+  UNUSED(prv);
+  Py_RETURN_TRUE;
+}
 
-static JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; }
+static JSOBJ Object_newFalse(void *prv) {
+  UNUSED(prv);
+  Py_RETURN_FALSE;
+}
 
-static JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; }
+static JSOBJ Object_newNull(void *prv) {
+  UNUSED(prv);
+  Py_RETURN_NONE;
+}
 
 static JSOBJ Object_newPosInf(void *prv) {
+  UNUSED(prv);
   return PyFloat_FromDouble(Py_HUGE_VAL);
 }
 
 static JSOBJ Object_newNegInf(void *prv) {
+  UNUSED(prv);
   return PyFloat_FromDouble(-Py_HUGE_VAL);
 }
 
-static JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); }
+static JSOBJ Object_newObject(void *prv, void *decoder) {
+  UNUSED(prv);
+  UNUSED(decoder);
+  return PyDict_New();
+}
 
-static JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; }
+static JSOBJ Object_endObject(void *prv, JSOBJ obj) {
+  UNUSED(prv);
+  return obj;
+}
 
-static JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); }
+static JSOBJ Object_newArray(void *prv, void *decoder) {
+  UNUSED(prv);
+  UNUSED(decoder);
+  return PyList_New(0);
+}
 
-static JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; }
+static JSOBJ Object_endArray(void *prv, JSOBJ obj) {
+  UNUSED(prv);
+  return obj;
+}
 
 static JSOBJ Object_newInteger(void *prv, JSINT32 value) {
-  return PyLong_FromLong((long)value);
+  UNUSED(prv);
+  return PyLong_FromLong(value);
 }
 
 static JSOBJ Object_newLong(void *prv, JSINT64 value) {
+  UNUSED(prv);
   return PyLong_FromLongLong(value);
 }
 
 static JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value) {
+  UNUSED(prv);
   return PyLong_FromUnsignedLongLong(value);
 }
 
 static JSOBJ Object_newDouble(void *prv, double value) {
+  UNUSED(prv);
   return PyFloat_FromDouble(value);
 }
 
 static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) {
+  UNUSED(prv);
   PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder;
   if (obj != decoder->npyarr_addr) {
     Py_XDECREF(((PyObject *)obj));
@@ -141,6 +175,7 @@ static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) {
 static char *g_kwlist[] = {"obj", "precise_float", "labelled", "dtype", NULL};
 
 PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
+  UNUSED(self);
   PyObject *ret;
   PyObject *sarg;
   PyObject *arg;
@@ -151,13 +186,28 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
   int labelled = 0;
 
   JSONObjectDecoder dec = {
-      Object_newString,  Object_objectAddKey,  Object_arrayAddItem,
-      Object_newTrue,    Object_newFalse,      Object_newNull,
-      Object_newPosInf,  Object_newNegInf,     Object_newObject,
-      Object_endObject,  Object_newArray,      Object_endArray,
-      Object_newInteger, Object_newLong,       Object_newUnsignedLong,
-      Object_newDouble,  Object_releaseObject, PyObject_Malloc,
-      PyObject_Free,     PyObject_Realloc};
+      .newString = Object_newString,
+      .objectAddKey = Object_objectAddKey,
+      .arrayAddItem = Object_arrayAddItem,
+      .newTrue = Object_newTrue,
+      .newFalse = Object_newFalse,
+      .newNull = Object_newNull,
+      .newPosInf = Object_newPosInf,
+      .newNegInf = Object_newNegInf,
+      .newObject = Object_newObject,
+      .endObject = Object_endObject,
+      .newArray = Object_newArray,
+      .endArray = Object_endArray,
+      .newInt = Object_newInteger,
+      .newLong = Object_newLong,
+      .newUnsignedLong = Object_newUnsignedLong,
+      .newDouble = Object_newDouble,
+      .releaseObject = Object_releaseObject,
+      .malloc = PyObject_Malloc,
+      .free = PyObject_Free,
+      .realloc = PyObject_Realloc,
+      .errorStr = NULL,
+  };
 
   dec.preciseFloat = 0;
   dec.prv = NULL;