pp_{add,subtract,multiply}: use __builtin_{add,sub,mul}_overflow if available

t-a-k · khwilliamson · commit 0f2b1e648156 · 2025-08-19T13:11:33.000-06:00
This will hopefully make the code faster and smaller, and make more cases to be handled as "simple common cases". Note that this change uses HAS_BUILTIN_{ADD,SUB,MUL}_OVERFLOW macros which have already been defined in config.h but seem not to have been used by existing code. t/op/64bitint.t: Add tests to exercise "simple common cases". Note that these tests should pass even before this change. Thanks to @tonycoz for advices to make this patch work better for LLP64 platforms, especially Win32 x86-64.
diff --git a/inline.h b/inline.h
@@ -3413,6 +3413,192 @@ S_lossless_NV_to_IV(const NV nv, IV *ivp)
     return FALSE;
 }
 
+/*
+ * S_iv_{add,sub,mul}_may_overflow(a, b, p) virtually compute "a <op> b"
+ * (where <op> is +, -, or *) in infinite precision, and, if the result
+ * is (or may be) not representable with IV, return true.
+ * Otherwise (no overflow), store the result to *p and return false.
+ * These functions allow false positives (so their names contain "may")
+ * to speed up simple common cases.
+ */
+
+/* Define IV_*_OVERFLOW_IS_EXPENSIVE below to nonzero value
+ * if strict overflow checks are too expensive
+ * (for example, for CPUs that have no hardware overflow detection flags).
+ * If these macros have nonzero value, or overflow-checking compiler intrinsics
+ * are not available, good-old heuristics (with some false positives)
+ * will be used.  */
+#  ifndef IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
+#    define IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE 0
+#  endif
+#  ifndef IV_MUL_OVERFLOW_IS_EXPENSIVE
+/* Strict overflow check for IV multiplication is generally expensive
+ * when IV is a multi-word integer.
+ * We assume that PTRSIZE matches the platform word size; LONGSIZE might not
+ * match for LLP64 platforms such as Win32 x86-64.  */
+#    define IV_MUL_OVERFLOW_IS_EXPENSIVE (IVSIZE > PTRSIZE)
+#  endif
+
+#  if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
+/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
+#    define S_iv_add_may_overflow(il, ir, result) ckd_add(result, il, ir)
+#  elif defined(HAS_BUILTIN_ADD_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
+#    define S_iv_add_may_overflow __builtin_add_overflow
+#  else
+PERL_STATIC_INLINE bool
+S_iv_add_may_overflow (IV il, IV ir, IV *const result)
+{
+    /* topl and topr hold only 2 bits */
+    PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2);
+    PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2);
+
+    /* if both are in a range that can't under/overflow, do a simple integer
+     * add: if the top of both numbers are 00  or 11, then it's safe */
+    if (!( ((topl+1) | (topr+1)) & 2)) {
+        *result = il + ir;
+        return false;
+    }
+    return true;                   /* addition may overflow */
+}
+#  endif
+
+/*
+ * S_uv_{add,sub,mul}_overflow(a, b, p) are similar, but the results are UV
+ * and they should perform strict overflow check (no false positives).
+ */
+
+#  if defined(I_STDCKDINT)
+/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
+#    define S_uv_add_overflow(auv, buv, result) ckd_add(result, auv, buv)
+#  elif defined(HAS_BUILTIN_ADD_OVERFLOW)
+#    define S_uv_add_overflow __builtin_add_overflow
+#  else
+PERL_STATIC_INLINE bool
+S_uv_add_overflow (UV auv, UV buv, UV *const result)
+{
+    /* (auv + buv) < auv means that the addition wrapped around,
+       i.e. overflowed.  Note that unsigned integer overflow is well-defined
+       in standard C to wrap around, in constrast to signed integer overflow
+       whose behaviour is undefined.  */
+    return (*result = auv + buv) < auv;
+}
+#  endif
+
+#  if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
+/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
+#    define S_iv_sub_may_overflow(il, ir, result) ckd_sub(result, il, ir)
+#  elif defined(HAS_BUILTIN_SUB_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
+#    define S_iv_sub_may_overflow __builtin_sub_overflow
+#  else
+PERL_STATIC_INLINE bool
+S_iv_sub_may_overflow (IV il, IV ir, IV *const result)
+{
+    PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2);
+    PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2);
+
+    /* if both are in a range that can't under/overflow, do a simple integer
+     * subtract: if the top of both numbers are 00  or 11, then it's safe */
+    if (!( ((topl+1) | (topr+1)) & 2)) {
+        *result = il - ir;
+        return false;
+    }
+    return true;                   /* subtraction may overflow */
+}
+#  endif
+
+#  if defined(I_STDCKDINT)
+/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
+#    define S_uv_sub_overflow(auv, buv, result) ckd_sub(result, auv, buv)
+#  elif defined(HAS_BUILTIN_SUB_OVERFLOW)
+#    define S_uv_sub_overflow __builtin_sub_overflow
+#  else
+PERL_STATIC_INLINE bool
+S_uv_sub_overflow (UV auv, UV buv, UV *const result)
+{
+    return (*result = auv - buv) > auv;
+}
+#  endif
+
+#  if defined(I_STDCKDINT) && !IV_MUL_OVERFLOW_IS_EXPENSIVE
+/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
+#    define S_iv_mul_may_overflow(il, ir, result) ckd_mul(result, il, ir)
+#  elif defined(HAS_BUILTIN_MUL_OVERFLOW) && !IV_MUL_OVERFLOW_IS_EXPENSIVE
+#    define S_iv_mul_may_overflow __builtin_mul_overflow
+#  else
+PERL_STATIC_INLINE bool
+S_iv_mul_may_overflow (IV il, IV ir, IV *const result)
+{
+    UV const topl = ((UV)il) >> (UVSIZE * 4 - 1);
+    UV const topr = ((UV)ir) >> (UVSIZE * 4 - 1);
+
+    /* if both are in a range that can't under/overflow, do a simple integer
+     * multiply: if the top halves(*) of both numbers are 00...00  or 11...11,
+     * then it's safe.
+     * (*) for 32-bits, the "top half" is the top 17 bits,
+     *     for 64-bits, its 33 bits */
+    if (!(
+              ((topl+1) | (topr+1))
+            & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
+    )) {
+        *result = il * ir;
+        return false;
+    }
+    return true;                   /* multiplication may overflow */
+}
+#  endif
+
+#  if defined(I_STDCKDINT)
+/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
+#    define S_uv_mul_overflow(auv, buv, result) ckd_mul(result, auv, buv)
+#  elif defined(HAS_BUILTIN_MUL_OVERFLOW)
+#    define S_uv_mul_overflow   __builtin_mul_overflow
+#  else
+PERL_STATIC_INLINE bool
+S_uv_mul_overflow (UV auv, UV buv, UV *const result)
+{
+    const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
+    const UV botmask = ~topmask;
+
+#    if UVSIZE > LONGSIZE && UVSIZE <= 2 * LONGSIZE
+    /* If UV is double-word integer, declare these variables as single-word
+       integers to help compiler to avoid double-word multiplication.  */
+    unsigned long alow, ahigh, blow, bhigh;
+#    else
+    UV alow, ahigh, blow, bhigh;
+#    endif
+
+    /* If this does sign extension on unsigned it's time for plan B  */
+    ahigh = auv >> (4 * sizeof (UV));
+    alow  = auv & botmask;
+    bhigh = buv >> (4 * sizeof (UV));
+    blow  = buv & botmask;
+
+    if (ahigh && bhigh)
+        /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
+           which is overflow.  */
+        return true;
+
+    UV product_middle = 0;
+    if (ahigh || bhigh) {
+        /* One operand is large, 1 small */
+        /* Either ahigh or bhigh is zero here, so the addition below
+           can't overflow.  */
+        product_middle = (UV)ahigh * blow + (UV)alow * bhigh;
+        if (product_middle & topmask)
+            return true;
+        /* OK, product_middle won't lose bits when we shift it.  */
+        product_middle <<= 4 * sizeof (UV);
+    }
+    /* else: eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
+       so the unsigned multiply cannot overflow.  */
+
+    /* (UV) cast below is necessary to force the multiplication to produce
+       UV result, as alow and blow might be narrower than UV */
+    UV product_low = (UV)alow * blow;
+    return S_uv_add_overflow(product_middle, product_low, result);
+}
+#  endif
+
 #endif
 
 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
@@ -110,6 +110,12 @@ There may well be none in a stable release.
 
 =item *
 
+Simple (non-overflowing) addition (C<+>), subtraction (C<->) and
+multiplication (C<*>) of IVs are slightly sped up, as long as
+sufficient underlying C compiler support is available.
+
+=item *
+
 XXX
 
 =back
diff --git a/pp.c b/pp.c
@@ -1336,23 +1336,12 @@ PP(pp_multiply)
         U32 flags = (svl->sv_flags & svr->sv_flags);
         if (flags & SVf_IOK) {
             /* both args are simple IVs */
-            UV topl, topr;
+            IV result;
             il = SvIVX(svl);
             ir = SvIVX(svr);
           do_iv:
-            topl = ((UV)il) >> (UVSIZE * 4 - 1);
-            topr = ((UV)ir) >> (UVSIZE * 4 - 1);
-
-            /* if both are in a range that can't under/overflow, do a
-             * simple integer multiply: if the top halves(*) of both numbers
-             * are 00...00  or 11...11, then it's safe.
-             * (*) for 32-bits, the "top half" is the top 17 bits,
-             *     for 64-bits, its 33 bits */
-            if (!(
-                      ((topl+1) | (topr+1))
-                    & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
-            )) {
-                TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
+            if (!S_iv_mul_may_overflow(il, ir, &result)) {
+                TARGi(result, 0); /* args not GMG, so can't be tainted */
                 goto ret;
             }
             goto generic;
@@ -1388,12 +1377,9 @@ PP(pp_multiply)
         if (SvIV_please_nomg(svl)) {
             bool auvok = SvUOK(svl);
             bool buvok = SvUOK(svr);
-            const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
-            const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
             UV alow;
-            UV ahigh;
             UV blow;
-            UV bhigh;
+            UV product;
 
             if (auvok) {
                 alow = SvUVX(svl);
@@ -1420,19 +1406,7 @@ PP(pp_multiply)
                 }
             }
 
-            /* If this does sign extension on unsigned it's time for plan B  */
-            ahigh = alow >> (4 * sizeof (UV));
-            alow &= botmask;
-            bhigh = blow >> (4 * sizeof (UV));
-            blow &= botmask;
-            if (ahigh && bhigh) {
-                NOOP;
-                /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
-                   which is overflow. Drop to NVs below.  */
-            } else if (!ahigh && !bhigh) {
-                /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
-                   so the unsigned multiply cannot overflow.  */
-                const UV product = alow * blow;
+            if (!S_uv_mul_overflow(alow, blow, &product)) {
                 if (auvok == buvok) {
                     /* -ve * -ve or +ve * +ve gives a +ve result.  */
                     TARGu(product, 1);
@@ -1442,42 +1416,6 @@ PP(pp_multiply)
                     TARGi(NEGATE_2IV(product), 1);
                     goto ret;
                 } /* else drop to NVs below. */
-            } else {
-                /* One operand is large, 1 small */
-                UV product_middle;
-                if (bhigh) {
-                    /* swap the operands */
-                    ahigh = bhigh;
-                    bhigh = blow; /* bhigh now the temp var for the swap */
-                    blow = alow;
-                    alow = bhigh;
-                }
-                /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
-                   multiplies can't overflow. shift can, add can, -ve can.  */
-                product_middle = ahigh * blow;
-                if (!(product_middle & topmask)) {
-                    /* OK, (ahigh * blow) won't lose bits when we shift it.  */
-                    UV product_low;
-                    product_middle <<= (4 * sizeof (UV));
-                    product_low = alow * blow;
-
-                    /* as for pp_add, UV + something mustn't get smaller.
-                       IIRC ANSI mandates this wrapping *behaviour* for
-                       unsigned whatever the actual representation*/
-                    product_low += product_middle;
-                    if (product_low >= product_middle) {
-                        /* didn't overflow */
-                        if (auvok == buvok) {
-                            /* -ve * -ve or +ve * +ve gives a +ve result.  */
-                            TARGu(product_low, 1);
-                            goto ret;
-                        } else if (product_low <= ABS_IV_MIN) {
-                            /* -ve result, which could overflow an IV  */
-                            TARGi(NEGATE_2IV(product_low), 1);
-                            goto ret;
-                        } /* else drop to NVs below. */
-                    }
-                } /* product_middle too large */
             } /* ahigh && bhigh */
         } /* SvIOK(svl) */
     } /* SvIOK(svr) */
@@ -1929,18 +1867,12 @@ PP(pp_subtract)
         U32 flags = (svl->sv_flags & svr->sv_flags);
         if (flags & SVf_IOK) {
             /* both args are simple IVs */
-            UV topl, topr;
+            IV result;
             il = SvIVX(svl);
             ir = SvIVX(svr);
           do_iv:
-            topl = ((UV)il) >> (UVSIZE * 8 - 2);
-            topr = ((UV)ir) >> (UVSIZE * 8 - 2);
-
-            /* if both are in a range that can't under/overflow, do a
-             * simple integer subtract: if the top of both numbers
-             * are 00  or 11, then it's safe */
-            if (!( ((topl+1) | (topr+1)) & 2)) {
-                TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
+            if (!S_iv_sub_may_overflow(il, ir, &result)) {
+                TARGi(result, 0); /* args not GMG, so can't be tainted */
                 goto ret;
             }
             goto generic;
diff --git a/pp_hot.c b/pp_hot.c
@@ -1827,18 +1827,12 @@ PP(pp_add)
         U32 flags = (svl->sv_flags & svr->sv_flags);
         if (flags & SVf_IOK) {
             /* both args are simple IVs */
-            UV topl, topr;
+            IV result;
             il = SvIVX(svl);
             ir = SvIVX(svr);
           do_iv:
-            topl = ((UV)il) >> (UVSIZE * 8 - 2);
-            topr = ((UV)ir) >> (UVSIZE * 8 - 2);
-
-            /* if both are in a range that can't under/overflow, do a
-             * simple integer add: if the top of both numbers
-             * are 00  or 11, then it's safe */
-            if (!( ((topl+1) | (topr+1)) & 2)) {
-                TARGi(il + ir, 0); /* args not GMG, so can't be tainted */
+            if (!S_iv_add_may_overflow(il, ir, &result)) {
+                TARGi(result, 0); /* args not GMG, so can't be tainted */
                 goto ret;
             }
             goto generic;
diff --git a/t/op/64bitint.t b/t/op/64bitint.t
@@ -469,4 +469,30 @@ cmp_ok  0x3ffffffffffffffe % -0xc000000000000000, '==', -0x8000000000000002, 'mo
 cmp_ok  0x3fffffffffffffff % -0xc000000000000000, '==', -0x8000000000000001, 'modulo is (IV_MIN-1)';
 cmp_ok  0x4000000000000000 % -0xc000000000000000, '==', -0x8000000000000000, 'modulo is IV_MIN';
 
+# Arithmetic close to IV overflow
+
+# These had been handled in generic (slower) code, but now in fast path
+# (as "simple common case").  Either way, these tests should pass.
+$q = 9223372036854775800;
+cmp_ok 5 + $q, '==', 9223372036854775805, "5 + $q";
+cmp_ok $q - -5, '==', 9223372036854775805, "$q - -5";
+$q = 1111111111111111111;
+cmp_ok $q * 5, '==', 5555555555555555555, "$q * 5";
+
+# IV <op> IV -> UV/NV promotion
+
+$q = 7777777777777777777;
+$r = 2222222222222222223;
+# Note 10000000000000000000 can be represented accurately in both
+# IEEE double (binary64; 0x1.158e460913dp+63) and decimal format (1e+19)
+cmp_ok $q + $r, '==', 10000000000000000000, 'IV + IV promotes to UV';
+cmp_ok -$q + -$r, '==', -10000000000000000000, 'IV + IV promotes to NV';
+cmp_ok $q - -$r, '==', 10000000000000000000, 'IV - IV promotes to UV';
+cmp_ok -$q - $r, '==', -10000000000000000000, 'IV - IV promotes to NV';
+$q = 3000000000;
+$r = 4000000000;
+cmp_ok $q * $r, '==', 12000000000000000000, 'IV * IV promotes to UV';
+cmp_ok $q * -$r, '==', -12000000000000000000, 'IV * IV promotes to UV then NV';
+cmp_ok +($q * 2) * $r, '==', 24000000000000000000, 'IV * IV promotes to NV';
+
 done_testing();