Skip to content

Commit 4a4a31f

Browse files
Support underflow and yield value in the optimizer
1 parent 7e7b240 commit 4a4a31f

File tree

6 files changed

+106
-107
lines changed

6 files changed

+106
-107
lines changed

Include/internal/pycore_optimizer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ struct _Py_UOpsAbstractFrame {
250250
int stack_len;
251251
int locals_len;
252252
PyFunctionObject *func;
253+
PyCodeObject *code;
253254

254255
JitOptRef *stack_pointer;
255256
JitOptRef *stack;
@@ -325,7 +326,7 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
325326
int curr_stackentries,
326327
JitOptRef *args,
327328
int arg_len);
328-
extern int _Py_uop_frame_pop(JitOptContext *ctx);
329+
extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries);
329330

330331
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
331332

Python/optimizer.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,9 @@ _PyJit_translate_single_bytecode_to_trace(
872872
else {
873873
operand = 0;
874874
}
875+
ADD_TO_TRACE(uop, oparg, operand, target);
876+
trace[trace_length - 1].operand1 = ((int)(frame->stackpointer - _PyFrame_Stackbase(frame)));
877+
break;
875878
}
876879
if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
877880
assert(i + 1 == nuops);

Python/optimizer_analysis.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ static
267267
PyCodeObject *
268268
get_current_code_object(JitOptContext *ctx)
269269
{
270-
return (PyCodeObject *)ctx->frame->func->func_code;
270+
return (PyCodeObject *)ctx->frame->code;
271271
}
272272

273273
static PyObject *
@@ -298,10 +298,6 @@ optimize_uops(
298298
JitOptContext context;
299299
JitOptContext *ctx = &context;
300300
uint32_t opcode = UINT16_MAX;
301-
int curr_space = 0;
302-
int max_space = 0;
303-
_PyUOpInstruction *first_valid_check_stack = NULL;
304-
_PyUOpInstruction *corresponding_check_stack = NULL;
305301

306302
// Make sure that watchers are set up
307303
PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -368,14 +364,6 @@ optimize_uops(
368364
/* Either reached the end or cannot optimize further, but there
369365
* would be no benefit in retrying later */
370366
_Py_uop_abstractcontext_fini(ctx);
371-
if (first_valid_check_stack != NULL) {
372-
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
373-
assert(max_space > 0);
374-
assert(max_space <= INT_MAX);
375-
assert(max_space <= INT32_MAX);
376-
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
377-
first_valid_check_stack->operand0 = max_space;
378-
}
379367
return trace_len;
380368

381369
error:

Python/optimizer_bytecodes.c

Lines changed: 35 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -773,50 +773,55 @@ dummy_func(void) {
773773
JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
774774
DEAD(retval);
775775
SAVE_STACK();
776-
PyCodeObject *co = get_current_code_object(ctx);
777776
ctx->frame->stack_pointer = stack_pointer;
778-
if (frame_pop(ctx)) {
777+
PyCodeObject *returning_code = get_code_with_logging(this_instr);
778+
if (returning_code == NULL) {
779+
ctx->done = true;
780+
break;
781+
}
782+
int returning_stacklevel = this_instr->operand1;
783+
if (frame_pop(ctx, returning_code, returning_stacklevel)) {
779784
break;
780785
}
781786
stack_pointer = ctx->frame->stack_pointer;
782787

783-
/* Stack space handling */
784-
assert(corresponding_check_stack == NULL);
785-
assert(co != NULL);
786-
int framesize = co->co_framesize;
787-
assert(framesize > 0);
788-
assert(framesize <= curr_space);
789-
curr_space -= framesize;
790-
791788
RELOAD_STACK();
792789
res = temp;
793790
}
794791

795792
op(_RETURN_GENERATOR, ( -- res)) {
796793
SYNC_SP();
797-
PyCodeObject *co = get_current_code_object(ctx);
798794
ctx->frame->stack_pointer = stack_pointer;
799-
frame_pop(ctx);
795+
PyCodeObject *returning_code = get_code_with_logging(this_instr);
796+
if (returning_code == NULL) {
797+
ctx->done = true;
798+
break;
799+
}
800+
int returning_stacklevel = this_instr->operand1;
801+
if (frame_pop(ctx, returning_code, returning_stacklevel)) {
802+
break;
803+
}
800804
stack_pointer = ctx->frame->stack_pointer;
801805
res = sym_new_unknown(ctx);
802-
/* Stack space handling */
803-
assert(corresponding_check_stack == NULL);
804-
assert(co != NULL);
805-
int framesize = co->co_framesize;
806-
assert(framesize > 0);
807-
assert(framesize <= curr_space);
808-
curr_space -= framesize;
809-
}
810-
811-
op(_YIELD_VALUE, (unused -- value)) {
812-
// TODO (gh-139109): handle this properly in a future optimization.
813-
// A possibility to handle underflows is to just restore the current frame information
814-
// from whatever is stored in the trace we record at that point of time.
815-
// E.g. we record at this YIELD_VALUE, func_obj=x , stack_level=4
816-
// We can restore it to there.
817-
value = sym_new_unknown(ctx);
818-
ctx->done = true;
819-
ctx->out_of_space = true;
806+
}
807+
808+
op(_YIELD_VALUE, (retval -- value)) {
809+
// Mimics PyStackRef_MakeHeapSafe in the interpreter.
810+
JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
811+
DEAD(retval);
812+
SAVE_STACK();
813+
PyCodeObject *returning_code = get_code_with_logging(this_instr);
814+
if (returning_code == NULL) {
815+
ctx->done = true;
816+
break;
817+
}
818+
int returning_stacklevel = this_instr->operand1;
819+
if (frame_pop(ctx, returning_code, returning_stacklevel)) {
820+
break;
821+
}
822+
stack_pointer = ctx->frame->stack_pointer;
823+
RELOAD_STACK();
824+
value = temp;
820825
}
821826

822827
op(_GET_ITER, (iterable -- iter, index_or_null)) {
@@ -843,8 +848,6 @@ dummy_func(void) {
843848
}
844849

845850
op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) {
846-
assert(corresponding_check_stack == NULL);
847-
corresponding_check_stack = this_instr;
848851
}
849852

850853
op (_CHECK_STACK_SPACE_OPERAND, (framesize/2 -- )) {
@@ -870,24 +873,6 @@ dummy_func(void) {
870873
PyCodeObject *co = (PyCodeObject *)func->func_code;
871874
assert(PyFunction_Check(func));
872875
ctx->frame->func = func;
873-
/* Stack space handling */
874-
int framesize = co->co_framesize;
875-
assert(framesize > 0);
876-
curr_space += framesize;
877-
if (curr_space < 0 || curr_space > INT32_MAX) {
878-
// won't fit in signed 32-bit int
879-
ctx->done = true;
880-
break;
881-
}
882-
max_space = curr_space > max_space ? curr_space : max_space;
883-
if (first_valid_check_stack == NULL) {
884-
first_valid_check_stack = corresponding_check_stack;
885-
}
886-
else if (corresponding_check_stack) {
887-
// delete all but the first valid _CHECK_STACK_SPACE
888-
corresponding_check_stack->opcode = _NOP;
889-
}
890-
corresponding_check_stack = NULL;
891876
}
892877

893878
op(_UNPACK_SEQUENCE, (seq -- values[oparg], top[0])) {

Python/optimizer_cases.c.h

Lines changed: 35 additions & 37 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer_symbols.c

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,7 @@ _Py_uop_frame_new(
824824
}
825825
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
826826

827+
frame->code = co;
827828
frame->stack_len = co->co_stacksize;
828829
frame->locals_len = co->co_nlocalsplus;
829830

@@ -905,18 +906,41 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx)
905906
}
906907

907908
int
908-
_Py_uop_frame_pop(JitOptContext *ctx)
909+
_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries)
909910
{
910911
_Py_UOpsAbstractFrame *frame = ctx->frame;
911912
ctx->n_consumed = frame->locals;
913+
912914
ctx->curr_frame_depth--;
913-
// TODO gh-139109: Handle trace recording underflow
914-
if (ctx->curr_frame_depth == 0) {
915-
ctx->done = true;
916-
ctx->out_of_space = true;
915+
916+
if (ctx->curr_frame_depth >= 1) {
917+
ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
918+
919+
// We returned to the correct code. Nothing to do here.
920+
if (co == ctx->frame->code) {
921+
return 0;
922+
}
923+
// Else: the code we recorded doesn't match the code we *think* we're
924+
// returning to. We could trace anything, we can't just return to the
925+
// old frame. We have to restore what the tracer recorded
926+
// as the traced next frame.
927+
// Remove the current frame, and later swap it out with the right one.
928+
else {
929+
ctx->curr_frame_depth--;
930+
}
931+
}
932+
// Else: trace stack underflow.
933+
934+
// This handles swapping out frames.
935+
assert(curr_stackentries >= 1);
936+
// -1 to stackentries as we push to the stack our return value after this.
937+
_Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, curr_stackentries - 1, NULL, 0);
938+
if (new_frame == NULL) {
917939
return 1;
918940
}
919-
ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
941+
942+
ctx->curr_frame_depth++;
943+
ctx->frame = new_frame;
920944

921945
return 0;
922946
}

0 commit comments

Comments
 (0)