Skip to content
Closed
16 changes: 8 additions & 8 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ typedef struct _PyExecutorObject {
size_t jit_size;
void *jit_code;
void *jit_side_entry;
PyObject *refs;
_PyExitData exits[1];
} _PyExecutorObject;

Expand Down Expand Up @@ -144,7 +145,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);

int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);
_PyBloomFilter *dependencies, PyObject *new_refs);

extern PyTypeObject _PyCounterExecutor_Type;
extern PyTypeObject _PyCounterOptimizer_Type;
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,8 @@ def testfunc(n):
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertNotIn("_GUARD_BOTH_INT", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_BINARY_OP_ADD_INT", uops)
self.assertIn("_LOAD_CONST_INLINE_BORROW", uops)
# Try again, but between the runs, set the global to a float.
# This should result in no executor the second time.
ns = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve constant propagation and folding in JIT-compiled code.
16 changes: 8 additions & 8 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,11 @@ dummy_func(
}

macro(BINARY_OP_MULTIPLY_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_MULTIPLY_INT;
NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_MULTIPLY_INT;
macro(BINARY_OP_ADD_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_ADD_INT;
NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_ADD_INT;
macro(BINARY_OP_SUBTRACT_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;
NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;

op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
Expand Down Expand Up @@ -558,11 +558,11 @@ dummy_func(
}

macro(BINARY_OP_MULTIPLY_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_MULTIPLY_FLOAT;
NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_MULTIPLY_FLOAT;
macro(BINARY_OP_ADD_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_ADD_FLOAT;
NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_ADD_FLOAT;
macro(BINARY_OP_SUBTRACT_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;
NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;

op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
Expand All @@ -585,7 +585,7 @@ dummy_func(
}

macro(BINARY_OP_ADD_UNICODE) =
_GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE;
NOP + _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE;

// This is a subtle one. It's a super-instruction for
// BINARY_OP_ADD_UNICODE followed by STORE_FAST
Expand Down Expand Up @@ -634,7 +634,7 @@ dummy_func(
}

macro(BINARY_OP_INPLACE_ADD_UNICODE) =
_GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_INPLACE_ADD_UNICODE;
NOP + NOP + _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_INPLACE_ADD_UNICODE;

family(BINARY_SUBSCR, INLINE_CACHE_ENTRIES_BINARY_SUBSCR) = {
BINARY_SUBSCR_DICT,
Expand Down
27 changes: 27 additions & 0 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 38 additions & 1 deletion Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ uop_dealloc(_PyExecutorObject *self) {
_PyObject_GC_UNTRACK(self);
assert(self->vm_data.code == NULL);
unlink_executor(self);
Py_CLEAR(self->refs);
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
Expand Down Expand Up @@ -360,6 +361,7 @@ static int
executor_traverse(PyObject *o, visitproc visit, void *arg)
{
_PyExecutorObject *executor = (_PyExecutorObject *)o;
Py_VISIT(executor->refs);
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_VISIT(executor->exits[i].executor);
}
Expand Down Expand Up @@ -1066,6 +1068,7 @@ allocate_executor(int exit_count, int length)
res->trace = (_PyUOpInstruction *)(res->exits + exit_count);
res->code_size = length;
res->exit_count = exit_count;
res->refs = NULL;
return res;
}

Expand Down Expand Up @@ -1247,12 +1250,19 @@ uop_optimize(
}
assert(length < UOP_MAX_TRACE_LENGTH);
OPT_STAT_INC(traces_created);
// These are any references that were created during optimization, and need
// to be kept alive until we build the executor's refs tuple:
PyObject *new_refs = PyList_New(0);
if (new_refs == NULL) {
return -1;
}
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
length = _Py_uop_analyze_and_optimize(frame, buffer,
length,
curr_stackentries, &dependencies);
curr_stackentries, &dependencies, new_refs);
if (length <= 0) {
Py_DECREF(new_refs);
return length;
}
}
Expand All @@ -1274,13 +1284,39 @@ uop_optimize(
assert(_PyOpcode_uop_name[buffer[pc].opcode]);
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
}
// We *might* want to de-duplicate these. In addition to making sure we do
// so in a way that preserves "equal" constants with different types (see
// _PyCode_ConstantKey), we *also* need to be careful to compare unknown
// objects by identity, since we don't want to invoke arbitary code in a
// __hash__/__eq__ implementation. It might be more trouble than it's worth:
int refs_needed = 0;
for (int i = 0; i < length; i++) {
if (buffer[i].opcode == _LOAD_CONST_INLINE) {
refs_needed++;
}
}
PyObject *refs = PyTuple_New(refs_needed);
if (refs == NULL) {
Py_DECREF(new_refs);
return -1;
}
int j = 0;
for (int i = 0; i < length; i++) {
if (buffer[i].opcode == _LOAD_CONST_INLINE) {
PyTuple_SET_ITEM(refs, j++, Py_NewRef(buffer[i].operand));
}
}
Py_DECREF(new_refs);
assert(j == refs_needed);
OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
length = prepare_for_execution(buffer, length);
assert(length <= UOP_MAX_TRACE_LENGTH);
_PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies);
if (executor == NULL) {
Py_DECREF(refs);
return -1;
}
executor->refs = refs;
assert(length <= UOP_MAX_TRACE_LENGTH);
*exec_ptr = executor;
return 1;
Expand Down Expand Up @@ -1584,6 +1620,7 @@ executor_clear(_PyExecutorObject *executor)
* free the executor unless we hold a strong reference to it
*/
Py_INCREF(executor);
Py_CLEAR(executor->refs);
for (uint32_t i = 0; i < executor->exit_count; i++) {
executor->exits[i].temperature = initial_unreachable_backoff_counter();
Py_CLEAR(executor->exits[i].executor);
Expand Down
36 changes: 27 additions & 9 deletions Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,20 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,

#define GETLOCAL(idx) ((ctx->frame->locals[idx]))

#define REPLACE_OP(INST, OP, ARG, OPERAND) \
INST->opcode = OP; \
INST->oparg = ARG; \
INST->operand = OPERAND;
#define REPLACE_OP(INST, OP, ARG, OPERAND) \
do { \
(INST)->opcode = (OP); \
(INST)->oparg = (ARG); \
(INST)->operand = (OPERAND); \
} while (0)

#define REPLACE_OP_WITH_LOAD_CONST(INST, CONST) \
do { \
PyObject *o = (CONST); \
int opcode = _Py_IsImmortal(o) ? _LOAD_CONST_INLINE_BORROW \
: _LOAD_CONST_INLINE; \
REPLACE_OP((INST), opcode, 0, (uintptr_t)o); \
} while (0)

/* Shortened forms for convenience, used in optimizer_bytecodes.c */
#define sym_is_not_null _Py_uop_sym_is_not_null
Expand Down Expand Up @@ -392,7 +402,8 @@ optimize_uops(
_PyUOpInstruction *trace,
int trace_len,
int curr_stacklen,
_PyBloomFilter *dependencies
_PyBloomFilter *dependencies,
PyObject *new_refs
)
{

Expand Down Expand Up @@ -524,6 +535,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
last_set_ip = pc;
break;
case _POP_TOP:
case _POP_TOP_LOAD_CONST_INLINE_BORROW:
{
_PyUOpInstruction *last = &buffer[pc-1];
while (last->opcode == _NOP) {
Expand All @@ -535,9 +547,14 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
last->opcode == _COPY
) {
last->opcode = _NOP;
buffer[pc].opcode = _NOP;
if (buffer[pc].opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) {
buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW;
}
else {
buffer[pc].opcode = _NOP;
}
}
if (last->opcode == _REPLACE_WITH_TRUE) {
if (last->opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) {
last->opcode = _NOP;
}
break;
Expand Down Expand Up @@ -580,7 +597,8 @@ _Py_uop_analyze_and_optimize(
_PyUOpInstruction *buffer,
int length,
int curr_stacklen,
_PyBloomFilter *dependencies
_PyBloomFilter *dependencies,
PyObject *new_refs
)
{
OPT_STAT_INC(optimizer_attempts);
Expand All @@ -592,7 +610,7 @@ _Py_uop_analyze_and_optimize(

length = optimize_uops(
_PyFrame_GetCode(frame), buffer,
length, curr_stacklen, dependencies);
length, curr_stacklen, dependencies, new_refs);

if (length <= 0) {
return length;
Expand Down
Loading
Loading