From a082766d34f4a85a8e614bed82b45bed597b3177 Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Wed, 11 Dec 2024 14:27:16 -0800
Subject: [PATCH 1/6] Add failing regression tests

---
 Lib/test/test_capi/test_opt.py | 44 ++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 4cf9b66170c055..9827cf0b6b6fce 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -1,4 +1,5 @@
 import contextlib
+import itertools
 import sys
 import textwrap
 import unittest
@@ -1511,6 +1512,49 @@ def test_jit_error_pops(self):
         with self.assertRaises(TypeError):
             {item for item in items}
 
+    def test_power_type_depends_on_input_values(self):
+        template = textwrap.dedent("""
+            import _testinternalcapi
+
+            L, R, X, Y = {l}, {r}, {x}, {y}
+
+            def check(actual: complex, expected: complex) -> None:
+                assert actual == expected, (actual, expected)
+                assert type(actual) is type(expected), (actual, expected)
+
+            def f(l: complex, r: complex) -> None:
+                expected_local_local = pow(l, r) + pow(l, r)
+                expected_const_local = pow(L, r) + pow(L, r)
+                expected_local_const = pow(l, R) + pow(l, R)
+                expected_const_const = pow(L, R) + pow(L, R)
+                for _ in range(_testinternalcapi.TIER2_THRESHOLD):
+                    # Narrow types:
+                    l + l, r + r
+                    # The powers produce results, and the addition is unguarded:
+                    check(l ** r + l ** r, expected_local_local)
+                    check(L ** r + L ** r, expected_const_local)
+                    check(l ** R + l ** R, expected_local_const)
+                    check(L ** R + L ** R, expected_const_const)
+
+            # JIT for one pair of values...
+            f(L, R)
+            # ...then run with another:
+            f(X, Y)
+        """)
+        interesting = [
+            (1, 1),  # int ** int -> int
+            (1, -1),  # int ** int -> float
+            (1, 1.0),  # int ** float -> float
+            (-1, 0.1),  # int ** float -> complex
+            (1.0, 1),  # float ** int -> float
+            (1.0, 1.0),  # float ** float -> float
+            (-1.0, 0.1),  # float ** float -> complex
+        ]
+        for (l, r), (x, y) in itertools.product(interesting, repeat=2):
+            s = template.format(l=l, r=r, x=x, y=y)
+            with self.subTest(l=l, r=r, x=x, y=y):
+                script_helper.assert_python_ok("-c", s)
+
 
 def global_identity(x):
     return x

From 43d8a3d6e6dc8404b63e5debde9f243581055ce8 Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Wed, 11 Dec 2024 14:27:47 -0800
Subject: [PATCH 2/6] Add some new asserts to specialized math

---
 Python/bytecodes.c         | 16 ++++++++++++++++
 Python/executor_cases.c.h  | 16 ++++++++++++++++
 Python/generated_cases.c.h | 16 ++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 3d280941b35244..4241b22132b530 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -511,6 +511,8 @@ dummy_func(
         pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyLong_CheckExact(left_o));
+            assert(PyLong_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
@@ -524,6 +526,8 @@ dummy_func(
         pure op(_BINARY_OP_ADD_INT, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyLong_CheckExact(left_o));
+            assert(PyLong_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
@@ -537,6 +541,8 @@ dummy_func(
         pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyLong_CheckExact(left_o));
+            assert(PyLong_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
@@ -574,6 +580,8 @@ dummy_func(
         pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             double dres =
@@ -588,6 +596,8 @@ dummy_func(
         pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             double dres =
@@ -602,6 +612,8 @@ dummy_func(
         pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             double dres =
@@ -631,6 +643,8 @@ dummy_func(
         pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyUnicode_CheckExact(left_o));
+            assert(PyUnicode_CheckExact(right_o));
 
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = PyUnicode_Concat(left_o, right_o);
@@ -653,6 +667,8 @@ dummy_func(
         op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyUnicode_CheckExact(left_o));
+            assert(PyUnicode_CheckExact(right_o));
 
             int next_oparg;
         #if TIER_ONE
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 987ff2e6419669..7b8bd3d5d44047 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -624,6 +624,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyLong_CheckExact(left_o));
+            assert(PyLong_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
             PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free);
@@ -644,6 +646,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyLong_CheckExact(left_o));
+            assert(PyLong_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
             PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free);
@@ -664,6 +668,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyLong_CheckExact(left_o));
+            assert(PyLong_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
             PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free);
@@ -724,6 +730,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             double dres =
             ((PyFloatObject *)left_o)->ob_fval *
@@ -745,6 +753,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             double dres =
             ((PyFloatObject *)left_o)->ob_fval +
@@ -766,6 +776,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyFloat_CheckExact(left_o));
+            assert(PyFloat_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             double dres =
             ((PyFloatObject *)left_o)->ob_fval -
@@ -805,6 +817,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyUnicode_CheckExact(left_o));
+            assert(PyUnicode_CheckExact(right_o));
             STAT_INC(BINARY_OP, hit);
             PyObject *res_o = PyUnicode_Concat(left_o, right_o);
             PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
@@ -824,6 +838,8 @@
             left = stack_pointer[-2];
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+            assert(PyUnicode_CheckExact(left_o));
+            assert(PyUnicode_CheckExact(right_o));
             int next_oparg;
             #if TIER_ONE
             assert(next_instr->op.code == STORE_FAST);
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 33f32aba1e5145..72e60688d99a5e 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -80,6 +80,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 double dres =
                 ((PyFloatObject *)left_o)->ob_fval +
@@ -116,6 +118,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
                 PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free);
@@ -151,6 +155,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyUnicode_CheckExact(left_o));
+                assert(PyUnicode_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 PyObject *res_o = PyUnicode_Concat(left_o, right_o);
                 PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
@@ -185,6 +191,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyUnicode_CheckExact(left_o));
+                assert(PyUnicode_CheckExact(right_o));
                 int next_oparg;
                 #if TIER_ONE
                 assert(next_instr->op.code == STORE_FAST);
@@ -247,6 +255,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 double dres =
                 ((PyFloatObject *)left_o)->ob_fval *
@@ -283,6 +293,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
                 PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free);
@@ -318,6 +330,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyFloat_CheckExact(left_o));
+                assert(PyFloat_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 double dres =
                 ((PyFloatObject *)left_o)->ob_fval -
@@ -354,6 +368,8 @@
             {
                 PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
                 PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
+                assert(PyLong_CheckExact(left_o));
+                assert(PyLong_CheckExact(right_o));
                 STAT_INC(BINARY_OP, hit);
                 PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
                 PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free);

From 5e6088e644d098e20a11ebbd6a0b63dfb642b63c Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Wed, 11 Dec 2024 14:28:23 -0800
Subject: [PATCH 3/6] Fix the optimizer's understanding of exponentiation

---
 Python/optimizer_bytecodes.c      | 81 +++++++++++++++++++++++-----
 Python/optimizer_cases.c.h        | 87 ++++++++++++++++++++++++++-----
 Tools/cases_generator/analyzer.py |  2 +
 3 files changed, 146 insertions(+), 24 deletions(-)

diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 42bdbd9ca8d0cd..b10c333987c42b 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -167,24 +167,81 @@ dummy_func(void) {
     }
 
     op(_BINARY_OP, (left, right -- res)) {
-        PyTypeObject *ltype = sym_get_type(left);
-        PyTypeObject *rtype = sym_get_type(right);
-        if (ltype != NULL && (ltype == &PyLong_Type || ltype == &PyFloat_Type) &&
-            rtype != NULL && (rtype == &PyLong_Type || rtype == &PyFloat_Type))
-        {
-            if (oparg != NB_TRUE_DIVIDE && oparg != NB_INPLACE_TRUE_DIVIDE &&
-                ltype == &PyLong_Type && rtype == &PyLong_Type) {
-                /* If both inputs are ints and the op is not division the result is an int */
-                res = sym_new_type(ctx, &PyLong_Type);
+        bool lhs_int = sym_matches_type(left, &PyLong_Type);
+        bool rhs_int = sym_matches_type(right, &PyLong_Type);
+        bool lhs_float = sym_matches_type(left, &PyFloat_Type);
+        bool rhs_float = sym_matches_type(right, &PyFloat_Type);
+        if ((!lhs_int && !lhs_float) || (!rhs_int && !rhs_float)) {
+            res = sym_new_unknown(ctx);
+            goto binary_op_done;
+        }
+        if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
+            // This one's fun: the *type* of the result depends on the *values*
+            // being exponentiated. But exponents with one constant part are
+            // reasonably common, so it's probably worth trying to be precise:
+            PyObject *lhs_const = sym_get_const(left);
+            PyObject *rhs_const = sym_get_const(right);
+            if (lhs_int && rhs_int) {
+                if (rhs_const == NULL) {
+                    // Unknown RHS means either int or float:
+                    res = sym_new_unknown(ctx);
+                    goto binary_op_done;
+                }
+                if (!_PyLong_IsNegative((PyLongObject *)rhs_const)) {
+                    // Non-negative RHS means int:
+                    res = sym_new_type(ctx, &PyLong_Type);
+                    goto binary_op_done;
+                }
+                // Negative RHS uses float_pow...
             }
-            else {
-                /* For any other op combining ints/floats the result is a float */
+            // Negative LHS *and* non-integral RHS means complex. So we need to
+            // disprove at least one to prove a float result:
+            if (rhs_int) {
+                // Integral RHS means float:
                 res = sym_new_type(ctx, &PyFloat_Type);
+                goto binary_op_done;
+            }
+            if (rhs_const) {
+                double rhs_double = PyFloat_AS_DOUBLE(rhs_const);
+                if (rhs_double == floor(rhs_double)) {
+                    // Integral RHS means float:
+                    res = sym_new_type(ctx, &PyFloat_Type);
+                    goto binary_op_done;
+                }
+            }
+            if (lhs_const) {
+                if (lhs_int) {
+                    if (!_PyLong_IsNegative((PyLongObject *)lhs_const)) {
+                        // Non-negative LHS means float:
+                        res = sym_new_type(ctx, &PyFloat_Type);
+                        goto binary_op_done;
+                    }
+                }
+                else if (0.0 <= PyFloat_AS_DOUBLE(lhs_const)) {
+                    // Non-negative LHS means float:
+                    res = sym_new_type(ctx, &PyFloat_Type);
+                    goto binary_op_done;
+                }
+                if (rhs_const) {
+                    // If we have two constants and failed to disprove that it's
+                    // complex, then it's complex:
+                    res = sym_new_type(ctx, &PyComplex_Type);
+                    goto binary_op_done;
+                }
             }
+            // Couldn't prove anything. It's either float or complex:
+            res = sym_new_unknown(ctx);
+        }
+        else if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
+            res = sym_new_type(ctx, &PyFloat_Type);
+        }
+        else if (lhs_int && rhs_int) {
+            res = sym_new_type(ctx, &PyLong_Type);
         }
         else {
-            res = sym_new_unknown(ctx);
+            res = sym_new_type(ctx, &PyFloat_Type);
         }
+binary_op_done:
     }
 
     op(_BINARY_OP_ADD_INT, (left, right -- res)) {
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index f77a5aa35bdf82..f3e99682551831 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -2276,24 +2276,87 @@
             _Py_UopsSymbol *res;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            PyTypeObject *ltype = sym_get_type(left);
-            PyTypeObject *rtype = sym_get_type(right);
-            if (ltype != NULL && (ltype == &PyLong_Type || ltype == &PyFloat_Type) &&
-                rtype != NULL && (rtype == &PyLong_Type || rtype == &PyFloat_Type))
-            {
-                if (oparg != NB_TRUE_DIVIDE && oparg != NB_INPLACE_TRUE_DIVIDE &&
-                    ltype == &PyLong_Type && rtype == &PyLong_Type) {
-                    /* If both inputs are ints and the op is not division the result is an int */
-                    res = sym_new_type(ctx, &PyLong_Type);
+            bool lhs_int = sym_matches_type(left, &PyLong_Type);
+            bool rhs_int = sym_matches_type(right, &PyLong_Type);
+            bool lhs_float = sym_matches_type(left, &PyFloat_Type);
+            bool rhs_float = sym_matches_type(right, &PyFloat_Type);
+            if ((!lhs_int && !lhs_float) || (!rhs_int && !rhs_float)) {
+                res = sym_new_unknown(ctx);
+                goto binary_op_done;
+            }
+            if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
+                // This one's fun: the *type* of the result depends on the *values*
+                // being exponentiated. But exponents with one constant part are
+                // reasonably common, so it's probably worth trying to be precise:
+                PyObject *lhs_const = sym_get_const(left);
+                PyObject *rhs_const = sym_get_const(right);
+                if (lhs_int && rhs_int) {
+                    if (rhs_const == NULL) {
+                        // Unknown RHS means either int or float:
+                        res = sym_new_unknown(ctx);
+                        goto binary_op_done;
+                    }
+                    if (!_PyLong_IsNegative((PyLongObject *)rhs_const)) {
+                        // Non-negative RHS means int:
+                        res = sym_new_type(ctx, &PyLong_Type);
+                        goto binary_op_done;
+                    }
+                    // Negative RHS uses float_pow...
                 }
-                else {
-                    /* For any other op combining ints/floats the result is a float */
+                // Negative LHS *and* non-integral RHS means complex. So we need to
+                // disprove at least one to prove a float result:
+                if (rhs_int) {
+                    // Integral RHS means float:
                     res = sym_new_type(ctx, &PyFloat_Type);
+                    goto binary_op_done;
+                }
+                if (rhs_const) {
+                    double rhs_double = PyFloat_AS_DOUBLE(rhs_const);
+                    if (rhs_double == floor(rhs_double)) {
+                        // Integral RHS means float:
+                        res = sym_new_type(ctx, &PyFloat_Type);
+                        goto binary_op_done;
+                    }
+                }
+                if (lhs_const) {
+                    if (lhs_int) {
+                        if (!_PyLong_IsNegative((PyLongObject *)lhs_const)) {
+                            // Non-negative LHS means float:
+                            res = sym_new_type(ctx, &PyFloat_Type);
+                            goto binary_op_done;
+                        }
+                    }
+                    else {
+                        if (0.0 <= PyFloat_AS_DOUBLE(lhs_const)) {
+                            // Non-negative LHS means float:
+                            res = sym_new_type(ctx, &PyFloat_Type);
+                            goto binary_op_done;
+                        }
+                    }
+                    if (rhs_const) {
+                        // If we have two constants and failed to disprove that it's
+                        // complex, then it's complex:
+                        res = sym_new_type(ctx, &PyComplex_Type);
+                        goto binary_op_done;
+                    }
                 }
+                // Couldn't prove anything. It's either float or complex:
+                res = sym_new_unknown(ctx);
             }
             else {
-                res = sym_new_unknown(ctx);
+                if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
+                    res = sym_new_type(ctx, &PyFloat_Type);
+                }
+                else {
+                    if (lhs_int && rhs_int) {
+                        res = sym_new_type(ctx, &PyLong_Type);
+                    }
+                    else {
+                        res = sym_new_type(ctx, &PyFloat_Type);
+                    }
+                }
             }
+            binary_op_done:
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index eca851e6de87ae..9c8b53e234a6a5 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -596,6 +596,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
     "_PyLong_CompactValue",
     "_PyLong_DigitCount",
     "_PyLong_IsCompact",
+    "_PyLong_IsNegative",
     "_PyLong_IsNonNegativeCompact",
     "_PyLong_IsZero",
     "_PyLong_Multiply",
@@ -634,6 +635,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
     "advance_backoff_counter",
     "assert",
     "backoff_counter_triggers",
+    "floor",
     "initial_temperature_backoff_counter",
     "maybe_lltrace_resume_frame",
     "restart_backoff_counter",

From 222b2c87d3857c1a12fb89d9afeb5bc8cb482f51 Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Wed, 11 Dec 2024 14:32:40 -0800
Subject: [PATCH 4/6] blurb add

---
 .../2024-12-11-14-32-22.gh-issue-127809.0W8khe.rst              | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-11-14-32-22.gh-issue-127809.0W8khe.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-11-14-32-22.gh-issue-127809.0W8khe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-11-14-32-22.gh-issue-127809.0W8khe.rst
new file mode 100644
index 00000000000000..19c8cc6e99c8c5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-11-14-32-22.gh-issue-127809.0W8khe.rst
@@ -0,0 +1,2 @@
+Fix an issue where the experimental JIT may infer an incorrect result type
+for exponentiation (``**`` and ``**=``), leading to bugs or crashes.

From 2833dd4a1b397f54fae1be76d2d63d084d8d7076 Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Fri, 20 Dec 2024 15:33:31 -0800
Subject: [PATCH 5/6] Don't bother with more complex cases

---
 Lib/test/test_capi/test_opt.py |   6 +-
 Python/optimizer_bytecodes.c   |  86 ++++++++++----------------
 Python/optimizer_cases.c.h     | 106 ++++++++++++++-------------------
 3 files changed, 78 insertions(+), 120 deletions(-)

diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 9827cf0b6b6fce..d84702411afe41 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -1544,11 +1544,11 @@ def f(l: complex, r: complex) -> None:
         interesting = [
             (1, 1),  # int ** int -> int
             (1, -1),  # int ** int -> float
-            (1, 1.0),  # int ** float -> float
-            (-1, 0.1),  # int ** float -> complex
             (1.0, 1),  # float ** int -> float
+            (1, 1.0),  # int ** float -> float
+            (-1, 0.5),  # int ** float -> complex
             (1.0, 1.0),  # float ** float -> float
-            (-1.0, 0.1),  # float ** float -> complex
+            (-1.0, 0.5),  # float ** float -> complex
         ]
         for (l, r), (x, y) in itertools.product(interesting, repeat=2):
             s = template.format(l=l, r=r, x=x, y=y)
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index b10c333987c42b..dd24614d71a0dd 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -171,66 +171,43 @@ dummy_func(void) {
         bool rhs_int = sym_matches_type(right, &PyLong_Type);
         bool lhs_float = sym_matches_type(left, &PyFloat_Type);
         bool rhs_float = sym_matches_type(right, &PyFloat_Type);
-        if ((!lhs_int && !lhs_float) || (!rhs_int && !rhs_float)) {
+        if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) {
+            // There's something other than an int or float involved:
             res = sym_new_unknown(ctx);
-            goto binary_op_done;
-        }
-        if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
-            // This one's fun: the *type* of the result depends on the *values*
-            // being exponentiated. But exponents with one constant part are
-            // reasonably common, so it's probably worth trying to be precise:
-            PyObject *lhs_const = sym_get_const(left);
-            PyObject *rhs_const = sym_get_const(right);
-            if (lhs_int && rhs_int) {
-                if (rhs_const == NULL) {
-                    // Unknown RHS means either int or float:
-                    res = sym_new_unknown(ctx);
-                    goto binary_op_done;
-                }
-                if (!_PyLong_IsNegative((PyLongObject *)rhs_const)) {
-                    // Non-negative RHS means int:
-                    res = sym_new_type(ctx, &PyLong_Type);
-                    goto binary_op_done;
-                }
-                // Negative RHS uses float_pow...
+        }
+        else if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
+            // This one's fun... the *type* of the result depends on the
+            // *values* being exponentiated. However, exponents with one
+            // constant part are reasonably common, so it's probably worth
+            // trying to infer some simple cases:
+            // - A: 1 ** 1 -> 1 (int ** int -> int)
+            // - B: 1 ** -1 -> 1.0 (int ** int -> float)
+            // - C: 1.0 ** 1 -> 1.0 (float ** int -> float)
+            // - D: 1 ** 1.0 -> 1.0 (int ** float -> float)
+            // - E: -1 ** 0.5 ~> 1j (int ** float -> complex)
+            // - F: 1.0 ** 1.0 -> 1.0 (float ** float -> float)
+            // - G: -1.0 ** 0.5 ~> 1j (float ** float -> complex)
+            if (rhs_float) {
+                // Case D, E, F, or G... can't know without the sign of the LHS
+                // or whether the RHS is whole, which isn't worth the effort:
+                res = sym_new_unknown(ctx);
             }
-            // Negative LHS *and* non-integral RHS means complex. So we need to
-            // disprove at least one to prove a float result:
-            if (rhs_int) {
-                // Integral RHS means float:
+            else if (lhs_float) {
+                // Case C:
                 res = sym_new_type(ctx, &PyFloat_Type);
-                goto binary_op_done;
             }
-            if (rhs_const) {
-                double rhs_double = PyFloat_AS_DOUBLE(rhs_const);
-                if (rhs_double == floor(rhs_double)) {
-                    // Integral RHS means float:
-                    res = sym_new_type(ctx, &PyFloat_Type);
-                    goto binary_op_done;
-                }
+            else if (!sym_is_const(right)) {
+                // Case A or B... can't know without the sign of the RHS:
+                res = sym_new_unknown(ctx);
             }
-            if (lhs_const) {
-                if (lhs_int) {
-                    if (!_PyLong_IsNegative((PyLongObject *)lhs_const)) {
-                        // Non-negative LHS means float:
-                        res = sym_new_type(ctx, &PyFloat_Type);
-                        goto binary_op_done;
-                    }
-                }
-                else if (0.0 <= PyFloat_AS_DOUBLE(lhs_const)) {
-                    // Non-negative LHS means float:
-                    res = sym_new_type(ctx, &PyFloat_Type);
-                    goto binary_op_done;
-                }
-                if (rhs_const) {
-                    // If we have two constants and failed to disprove that it's
-                    // complex, then it's complex:
-                    res = sym_new_type(ctx, &PyComplex_Type);
-                    goto binary_op_done;
-                }
+            else if (_PyLong_IsNegative((PyLongObject *)sym_get_const(right))) {
+                // Case B:
+                res = sym_new_type(ctx, &PyFloat_Type);
+            }
+            else {
+                // Case A:
+                res = sym_new_type(ctx, &PyLong_Type);
             }
-            // Couldn't prove anything. It's either float or complex:
-            res = sym_new_unknown(ctx);
         }
         else if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
             res = sym_new_type(ctx, &PyFloat_Type);
@@ -241,7 +218,6 @@ dummy_func(void) {
         else {
             res = sym_new_type(ctx, &PyFloat_Type);
         }
-binary_op_done:
     }
 
     op(_BINARY_OP_ADD_INT, (left, right -- res)) {
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index f3e99682551831..f4252984fe2a16 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -2280,83 +2280,65 @@
             bool rhs_int = sym_matches_type(right, &PyLong_Type);
             bool lhs_float = sym_matches_type(left, &PyFloat_Type);
             bool rhs_float = sym_matches_type(right, &PyFloat_Type);
-            if ((!lhs_int && !lhs_float) || (!rhs_int && !rhs_float)) {
+            if (!((lhs_int || lhs_float) && (rhs_int || rhs_float))) {
+                // There's something other than an int or float involved:
                 res = sym_new_unknown(ctx);
-                goto binary_op_done;
             }
-            if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
-                // This one's fun: the *type* of the result depends on the *values*
-                // being exponentiated. But exponents with one constant part are
-                // reasonably common, so it's probably worth trying to be precise:
-                PyObject *lhs_const = sym_get_const(left);
-                PyObject *rhs_const = sym_get_const(right);
-                if (lhs_int && rhs_int) {
-                    if (rhs_const == NULL) {
-                        // Unknown RHS means either int or float:
+            else {
+                if (oparg == NB_POWER || oparg == NB_INPLACE_POWER) {
+                    // This one's fun... the *type* of the result depends on the
+                    // *values* being exponentiated. However, exponents with one
+                    // constant part are reasonably common, so it's probably worth
+                    // trying to infer some simple cases:
+                    // - A: 1 ** 1 -> 1 (int ** int -> int)
+                    // - B: 1 ** -1 -> 1.0 (int ** int -> float)
+                    // - C: 1.0 ** 1 -> 1.0 (float ** int -> float)
+                    // - D: 1 ** 1.0 -> 1.0 (int ** float -> float)
+                    // - E: -1 ** 0.5 ~> 1j (int ** float -> complex)
+                    // - F: 1.0 ** 1.0 -> 1.0 (float ** float -> float)
+                    // - G: -1.0 ** 0.5 ~> 1j (float ** float -> complex)
+                    if (rhs_float) {
+                        // Case D, E, F, or G... can't know without the sign of the LHS
+                        // or whether the RHS is whole, which isn't worth the effort:
                         res = sym_new_unknown(ctx);
-                        goto binary_op_done;
-                    }
-                    if (!_PyLong_IsNegative((PyLongObject *)rhs_const)) {
-                        // Non-negative RHS means int:
-                        res = sym_new_type(ctx, &PyLong_Type);
-                        goto binary_op_done;
-                    }
-                    // Negative RHS uses float_pow...
-                }
-                // Negative LHS *and* non-integral RHS means complex. So we need to
-                // disprove at least one to prove a float result:
-                if (rhs_int) {
-                    // Integral RHS means float:
-                    res = sym_new_type(ctx, &PyFloat_Type);
-                    goto binary_op_done;
-                }
-                if (rhs_const) {
-                    double rhs_double = PyFloat_AS_DOUBLE(rhs_const);
-                    if (rhs_double == floor(rhs_double)) {
-                        // Integral RHS means float:
-                        res = sym_new_type(ctx, &PyFloat_Type);
-                        goto binary_op_done;
-                    }
-                }
-                if (lhs_const) {
-                    if (lhs_int) {
-                        if (!_PyLong_IsNegative((PyLongObject *)lhs_const)) {
-                            // Non-negative LHS means float:
-                            res = sym_new_type(ctx, &PyFloat_Type);
-                            goto binary_op_done;
-                        }
                     }
                     else {
-                        if (0.0 <= PyFloat_AS_DOUBLE(lhs_const)) {
-                            // Non-negative LHS means float:
+                        if (lhs_float) {
+                            // Case C:
                             res = sym_new_type(ctx, &PyFloat_Type);
-                            goto binary_op_done;
+                        }
+                        else {
+                            if (!sym_is_const(right)) {
+                                // Case A or B... can't know without the sign of the RHS:
+                                res = sym_new_unknown(ctx);
+                            }
+                            else {
+                                if (_PyLong_IsNegative((PyLongObject *)sym_get_const(right))) {
+                                    // Case B:
+                                    res = sym_new_type(ctx, &PyFloat_Type);
+                                }
+                                else {
+                                    // Case A:
+                                    res = sym_new_type(ctx, &PyLong_Type);
+                                }
+                            }
                         }
                     }
-                    if (rhs_const) {
-                        // If we have two constants and failed to disprove that it's
-                        // complex, then it's complex:
-                        res = sym_new_type(ctx, &PyComplex_Type);
-                        goto binary_op_done;
-                    }
-                }
-                // Couldn't prove anything. It's either float or complex:
-                res = sym_new_unknown(ctx);
-            }
-            else {
-                if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
-                    res = sym_new_type(ctx, &PyFloat_Type);
                 }
                 else {
-                    if (lhs_int && rhs_int) {
-                        res = sym_new_type(ctx, &PyLong_Type);
+                    if (oparg == NB_TRUE_DIVIDE || oparg == NB_INPLACE_TRUE_DIVIDE) {
+                        res = sym_new_type(ctx, &PyFloat_Type);
                     }
                     else {
-                        res = sym_new_type(ctx, &PyFloat_Type);
+                        if (lhs_int && rhs_int) {
+                            res = sym_new_type(ctx, &PyLong_Type);
+                        }
+                        else {
+                            res = sym_new_type(ctx, &PyFloat_Type);
+                        }
                     }
                 }
             }
-            binary_op_done:
             stack_pointer[-2] = res;
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());

From bef3cc82e6d21af5f1ca575cfb2aad0689865b79 Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Fri, 20 Dec 2024 15:41:37 -0800
Subject: [PATCH 6/6] Revert unneeded change

---
 Tools/cases_generator/analyzer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index 9c8b53e234a6a5..57943840d5b5f7 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -635,7 +635,6 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
     "advance_backoff_counter",
     "assert",
     "backoff_counter_triggers",
-    "floor",
     "initial_temperature_backoff_counter",
     "maybe_lltrace_resume_frame",
     "restart_backoff_counter",