Skip to content
Merged
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
d716faa
Experiment with borrowing load_fast
mpage Feb 5, 2025
3736923
Checkpoint poc
mpage Feb 5, 2025
7a14254
Fix pyframe copy
mpage Feb 6, 2025
b1607aa
Strengthen refs when frame is copied
mpage Feb 6, 2025
e765735
Cleanup
mpage Feb 6, 2025
291ace9
Consider all instructions when computing mutations
mpage Feb 6, 2025
17d6dd6
Add a super instruction
mpage Feb 7, 2025
0a74052
Don't optimize during quickening
mpage Feb 12, 2025
afbfd88
Use abstract interpretation
mpage Feb 11, 2025
696c630
Fix test_generators
mpage Feb 12, 2025
483ac7a
Optimize returns
mpage Feb 13, 2025
259d5db
Remove unused arg
mpage Feb 13, 2025
aeafa98
Make sure we convert borrowed refs on frame
mpage Feb 15, 2025
85f9a64
Don't test with malformed bytecode
mpage Feb 15, 2025
b6ab2f7
Make sure we convert borrowed refs to func/code when copying generato…
mpage Feb 15, 2025
fd1ad3d
Add support for disassembling LOAD_FAST_BORROW_LOAD_FAST_BORROW
mpage Feb 15, 2025
eee2195
Make sure exc_obj is always defined
mpage Feb 15, 2025
d75ec9a
Make sure we store new stackrefs for frame executable/funcobj
mpage Feb 19, 2025
66f5351
Remove refcount check
mpage Feb 19, 2025
7ef6a0b
Don't hardcode initial refcount in refcount tests
mpage Feb 19, 2025
2af2bbc
Remove invalid bytecode from `test_peepholer`
mpage Feb 19, 2025
bf19b7d
Fix invalid bytecode in `test_peepholer.DirectCfgOptimizerTests.test_…
mpage Feb 19, 2025
a9bca03
Fix tests that checked for `LOAD_FAST` instructions that are now opti…
mpage Feb 20, 2025
293c317
Update disassembly in test_dis to match new bytecode
mpage Feb 20, 2025
a12ccd9
Fix refleak in _BINARY_OP_INPLACE_ADD_UNICODE
mpage Feb 21, 2025
1ef26c5
Create new references to fast locals overwritten via f_locals
mpage Feb 21, 2025
1eb9226
Implement two missing opcodes in the static analysis
mpage Feb 21, 2025
7291c49
Use g_block_list when resetting stack depth
mpage Feb 21, 2025
90bf8df
Avoid reallocating state for each basic block
mpage Feb 21, 2025
9bfa922
Generators
mpage Feb 21, 2025
bf6222b
Move optimize after all other passes have run
mpage Feb 24, 2025
dd97d0c
Don't promote borrowed references in STORE_FAST
mpage Feb 24, 2025
6680709
Track reasons for not being able to optimize instructions
mpage Feb 24, 2025
6568fd9
Rename PyStackRef_DupDeferred
mpage Feb 24, 2025
6fde7b0
Rename _PyStackRef_StealIfUnborrowed
mpage Feb 25, 2025
de13810
Avoid extra copies in take_ownership
mpage Feb 25, 2025
fdeae7d
Make the default build work
mpage Feb 25, 2025
1aed281
Add docs for new opcodes
mpage Feb 25, 2025
c332912
Fix flag array size computation
mpage Feb 26, 2025
76a75a7
Add a high level comment explaining our approach
mpage Feb 26, 2025
dd6426f
Bump magic number after merge (was bumped on main)
mpage Feb 28, 2025
bf68eb9
Add more tests
mpage Feb 28, 2025
474a587
Update commented out assertion
mpage Feb 28, 2025
4b3aacf
Add NEWS entry
mpage Feb 28, 2025
e692037
Remove debug print
mpage Feb 28, 2025
2ecfe08
Merge branch 'main' into load-fast-borrow-absinterp
mpage Feb 28, 2025
f98d91d
Fix doctest
mpage Feb 28, 2025
725dc8e
Fix JIT tests
mpage Mar 1, 2025
03d35b2
Fix missed doctest
mpage Mar 1, 2025
39ff3f0
Fix narrowing
mpage Mar 1, 2025
f012a9f
Formatting
mpage Mar 1, 2025
b4b7f73
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 13, 2025
8ea82b5
Implement PyStackRef_{Is,Make}HeapSafe
mpage Mar 13, 2025
9bec9f5
Add missing error handling
mpage Mar 13, 2025
902ae84
Simplify frees
mpage Mar 13, 2025
b0ea38f
Get rid of `PyStackRef_IsBorrowed`
mpage Mar 13, 2025
1f5cfcd
Use PyStackRef_Borrow as the new API
mpage Mar 14, 2025
d1e8e45
Make the default build work
mpage Mar 14, 2025
00c95cc
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 17, 2025
cc01a30
Regen frozenmain
mpage Mar 17, 2025
6c5faab
Add a workaround for failing tests rather than change marshal.c
mpage Mar 17, 2025
32bd0c6
Update dis.rst to reflect support for LOAD_FAST_BORROW in the default…
mpage Mar 17, 2025
fdb8a82
Exclude immortal objects when keeping overwritten locals alive
mpage Mar 17, 2025
a962017
Use a tuple to store overwritten fast locals
mpage Mar 17, 2025
6c2f07d
Fix off-by-one error
mpage Mar 17, 2025
85b0b00
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 20, 2025
5ff2dea
Fix post-merge issues
mpage Mar 20, 2025
0c1e67f
English is hard
mpage Mar 20, 2025
ae2ec65
Improve readability of test cases
mpage Mar 20, 2025
03c474e
Elaborate in the blurb
mpage Mar 20, 2025
60665c9
Remove parameter to calculate stackdepth
mpage Mar 20, 2025
ac8940b
Update comment
mpage Mar 20, 2025
f12573f
Test optimize_load_fast as part of OptimizeCfg
mpage Mar 21, 2025
44f7ffc
Remove test with invalid bytecode
mpage Mar 21, 2025
818e94e
Add helper macro for pushing refs
mpage Mar 21, 2025
c30e1e9
Handle opcodes that leave at least one input on the stack
mpage Mar 22, 2025
112cee6
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 23, 2025
80fc5aa
Avoid having stackref only visible from the c stack
mpage Mar 23, 2025
492cce1
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 24, 2025
2e38f0d
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 31, 2025
2c55722
Merge branch 'main' into load-fast-borrow-absinterp
mpage Apr 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions Doc/library/dis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ the following command can be used to display the disassembly of
2 RESUME 0
<BLANKLINE>
3 LOAD_GLOBAL 1 (len + NULL)
LOAD_FAST 0 (alist)
LOAD_FAST_BORROW 0 (alist)
CALL 1
RETURN_VALUE

Expand Down Expand Up @@ -215,7 +215,7 @@ Example:
...
RESUME
LOAD_GLOBAL
LOAD_FAST
LOAD_FAST_BORROW
CALL
RETURN_VALUE

Expand Down Expand Up @@ -1402,13 +1402,29 @@ iterations of the loop.
This opcode is now only used in situations where the local variable is
guaranteed to be initialized. It cannot raise :exc:`UnboundLocalError`.

.. opcode:: LOAD_FAST_BORROW (var_num)

Pushes a borrowed reference to the local ``co_varnames[var_num]`` onto the stack
in free-threaded builds. In default builds this is identical to ``LOAD_FAST``.

.. versionadded:: 3.14

.. opcode:: LOAD_FAST_LOAD_FAST (var_nums)

Pushes references to ``co_varnames[var_nums >> 4]`` and
``co_varnames[var_nums & 15]`` onto the stack.

.. versionadded:: 3.13


.. opcode:: LOAD_FAST_BORROW_LOAD_FAST_BORROW (var_nums)

Pushes borrowed references to ``co_varnames[var_nums >> 4]`` and
``co_varnames[var_nums & 15]`` onto the stack in free-threaded builds. This is
identical to ``LOAD_FAST_LOAD_FAST`` in default builds.

.. versionadded:: 3.14

.. opcode:: LOAD_FAST_CHECK (var_num)

Pushes a reference to the local ``co_varnames[var_num]`` onto the stack,
Expand Down Expand Up @@ -2023,4 +2039,3 @@ instructions:

.. deprecated:: 3.13
All jumps are now relative. This list is empty.

3 changes: 3 additions & 0 deletions Include/internal/pycore_flowgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg(
PyObject *consts,
int nlocals);

// Export for '_testinternalcapi' shared extension
PyAPI_FUNC(PyObject*) _PyCompile_OptimizeLoadFast(PyObject *instructions);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't tend to expose individual peephole optimisations, we test them through OptimizeCFG (see DirectCfgOptimizerTests). Is there a reason why you can't do that for this optimization?

Copy link
Contributor Author

@mpage mpage Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can add a call to optimize_load_fast into _PyCompile_OptimizeCFG. I wasn't sure if _PyCompile_OptimizeCFG was intended to only test _PyCfg_OptimizeCodeUnit. We want this pass to be called as late as possible in compilation, which is why it is called in _PyCfg_OptimizedCfgToInstructionSequence instead of _PyCfg_OptimizeCodeUnit.


#ifdef __cplusplus
}
#endif
Expand Down
25 changes: 22 additions & 3 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ struct _frame {
PyEval_GetLocals requires a borrowed reference so the actual reference
is stored here */
PyObject *f_locals_cache;
/* A list containing strong references to fast locals that were overwritten
* via f_locals. Borrowed references to these locals may exist in frames
* closer to the top of the stack. The references in this list act as
* "support" for the borrowed references, ensuring that they remain valid.
*/
PyObject *f_overwritten_fast_locals;
/* The frame data, if this frame object owns the frame */
PyObject *_f_frame_data[1];
};
Expand Down Expand Up @@ -146,16 +152,29 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code)
return code->co_framesize - FRAME_SPECIALS_SIZE;
}

static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
static inline void
_PyFrame_CopyToHeap(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
{
*dest = *src;
assert(src->stackpointer != NULL);
int stacktop = (int)(src->stackpointer - src->localsplus);
assert(stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus);
dest->stackpointer = dest->localsplus + stacktop;
for (int i = 1; i < stacktop; i++) {
dest->localsplus[i] = src->localsplus[i];
// The destination frame may outlive any references that were providing
// "support" for borrowed references in the source frame. Convert any
// borrowed references that were copied into dest into strong references.
for (int i = 0; i < stacktop; i++) {
dest->localsplus[i] =
_PyStackRef_NewIfBorrowedOrSteal(src->localsplus[i]);
}
dest->f_executable = _PyStackRef_NewIfBorrowedOrSteal(dest->f_executable);
dest->f_funcobj = _PyStackRef_NewIfBorrowedOrSteal(dest->f_funcobj);
}

static inline void
_PyFrame_CopyToNewGen(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
{
_PyFrame_CopyToHeap(src, dest);
// Don't leave a dangling pointer to the old frame when creating generators
// and coroutines:
dest->previous = NULL;
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_magic_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ PC/launcher.c must also be updated.

*/

#define PYC_MAGIC_NUMBER 3617
#define PYC_MAGIC_NUMBER 3618
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
(little-endian) and then appending b'\r\n'. */
#define PYC_MAGIC_NUMBER_TOKEN \
Expand Down
26 changes: 24 additions & 2 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 42 additions & 3 deletions Include/internal/pycore_stackref.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ PyStackRef_IsNone(_PyStackRef ref)
return _Py_stackref_get_object(ref) == Py_None;
}

static inline bool
PyStackRef_IsBorrowed(_PyStackRef ref)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can't tell if a reference is borrowed from the reference itself. That can only be determined from the context.
if A lends a reference to B, A's reference if unchanged and B's reference is indistinguishable from any other reference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good example of why we need consistent, unambiguous terminology. The purpose of this function is to identify stackrefs with embedded refcounts (using your terms) that do not support deferred reclamation and are mortal. These references need to be converted to non-embedded references if they escape to the heap.

{
return false;
}

static inline PyObject *
_PyStackRef_AsPyObjectBorrow(_PyStackRef ref, const char *filename, int linenumber)
{
Expand Down Expand Up @@ -146,6 +152,12 @@ PyStackRef_CLOSE(_PyStackRef ref)
Py_DECREF(obj);
}

static inline _PyStackRef
_PyStackRef_NewIfBorrowedOrSteal(_PyStackRef ref)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name makes no sense to me. What does this do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is PyStackRef_ToNonEmbedded.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other than handling of null, this looks equivalent to the existing PyStackRef_AsStrongReference. We should use that instead.

Copy link
Contributor Author

@mpage mpage Mar 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other than handling of null, this looks equivalent to the existing PyStackRef_AsStrongReference.

I think it's subtlely different. It will only create a strong reference if the stackref is tagged as deferred and the referenced object is not immortal and does not support deferred reference counting. PyStackRef_AsStrongReference always creates a strong reference if the stackref is tagged as deferred. The distinction probably doesn't matter when its used for copying interpreter frames that escape into frame objects. However, it's also used for copying frames into new generators, and the unconditional creation of strong references may cause bottlenecks when it's performed on the code or function object.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not use "strong reference" except to contrast to "weak reference".
All PyStackRef references are strong references in the sense that an object they refer to cannot be reclaimed while they exist. Unlike weak references.

{
return ref;
}

static inline _PyStackRef
_PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber)
{
Expand Down Expand Up @@ -213,6 +225,27 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj)
}
# define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj))

static inline bool
PyStackRef_IsBorrowed(_PyStackRef stackref)
{
if (PyStackRef_IsNull(stackref) || !PyStackRef_IsDeferred(stackref)) {
return false;
}
PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref);
return !(_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj));
}


static inline _PyStackRef
_PyStackRef_NewIfBorrowedOrSteal(_PyStackRef stackref)
{
if (PyStackRef_IsBorrowed(stackref)) {
PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref);
return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR };
}
return stackref;
}

static inline _PyStackRef
PyStackRef_FromPyObjectNew(PyObject *obj)
{
Expand Down Expand Up @@ -253,15 +286,18 @@ PyStackRef_DUP(_PyStackRef stackref)
{
assert(!PyStackRef_IsNull(stackref));
if (PyStackRef_IsDeferred(stackref)) {
assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) ||
_PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref))
);
return stackref;
}
Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref));
return stackref;
}

static inline _PyStackRef
PyStackRef_AsDeferred(_PyStackRef stackref)
{
return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED };
}

// Convert a possibly deferred reference to a strong reference.
static inline _PyStackRef
PyStackRef_AsStrongReference(_PyStackRef stackref)
Expand All @@ -280,6 +316,7 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 };
#define PyStackRef_True ((_PyStackRef){.bits = (uintptr_t)&_Py_TrueStruct })
#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) })
#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) })
#define PyStackRef_IsBorrowed(stackref) false

#define PyStackRef_AsPyObjectBorrow(stackref) ((PyObject *)(stackref).bits)

Expand All @@ -295,6 +332,8 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 };

#define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref)))

#define _PyStackRef_NewIfBorrowedOrSteal(stackref) stackref

#define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) _Py_DECREF_SPECIALIZED(PyStackRef_AsPyObjectBorrow(stackref), dealloc)

#endif // Py_GIL_DISABLED
Expand Down
Loading
Loading