Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyStackRef *input, Py_
PyAPI_FUNC(void) _PyObjectArray_Free(PyObject **array, PyObject **scratch);

PyAPI_FUNC(PyObject *) _PyEval_GetANext(PyObject *aiter);
PyAPI_FUNC(PyObject *) _PyEval_LoadGlobal(PyObject *globals, PyObject *builtins, PyObject *name);
PyAPI_FUNC(void) _PyEval_LoadGlobalStackRef(PyObject *globals, PyObject *builtins, PyObject *name, _PyStackRef *writeto);
PyAPI_FUNC(PyObject *) _PyEval_GetAwaitable(PyObject *iterable, int oparg);
PyAPI_FUNC(PyObject *) _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject *name);

Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {

#include "pycore_object.h" // PyManagedDictPointer
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_ACQUIRE
#include "pycore_stackref.h" // _PyStackRef

// Unsafe flavor of PyDict_GetItemWithError(): no error checking
extern PyObject* _PyDict_GetItemWithError(PyObject *dp, PyObject *key);
Expand Down Expand Up @@ -100,10 +101,12 @@ extern void _PyDictKeys_DecRef(PyDictKeysObject *keys);
*/
extern Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr);
extern Py_ssize_t _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr);
extern Py_ssize_t _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr);

extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);
PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
PyAPI_FUNC(void) _PyDict_LoadGlobalStackRef(PyDictObject *, PyDictObject *, PyObject *, _PyStackRef *);

/* Consumes references to key and value */
PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value);
Expand Down
98 changes: 98 additions & 0 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,70 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
return ix;
}

Py_ssize_t
_Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr)
{
PyDictKeysObject *dk;
DictKeysKind kind;
Py_ssize_t ix;

ensure_shared_on_read(mp);

dk = _Py_atomic_load_ptr(&mp->ma_keys);
kind = dk->dk_kind;

if (kind != DICT_KEYS_GENERAL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is complicated and duplicates logic elsewhere. I think it might be worth only supporting a a single case or two. For example, unicode-only and non-split dict. Fallback to _Py_dict_lookup_threadsafe + PyStackRef_FromPyObjectSteal for other cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it is worth changing anything. LOAD_GLOBAL is specialized incredibly well, so this only applies to a rare slow path.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not specializing at all in the free-threaded build so LOAD_GLOBAL is a scaling bottleneck now that fewer types are immortalized.

I don't think we want to wait for specialization to be made thread-safe, but we can treat this as a temporary measure and structure the code to reflect that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this case to fall back to _Py_dict_lookup_threadsafe

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks backwards to me. The common case should be DICT_KEYS_UNICODE for globals dictionaries, but this only handles DICT_KEYS_GENERAL efficiently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is still too complicated. We only need to handle the common case of unicode keys. Globals are almost never split dictionaries.

I think this should look something like the following. You can refactor it further by extracting a _Py_TryXGetStackRef() that's like _Py_TryXGetRef but returns a _PyStackRef.

    PyDictKeysObject *dk = _Py_atomic_load_ptr(&mp->ma_keys);
    if (dk->dk_kind == DICT_KEYS_UNICODE && PyUnicode_CheckExact(key)) {
        Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
        if (ix == DKIX_EMPTY) {
            *value_addr = PyStackRef_NULL;
            return ix;
        }
        else if (ix >= 0) {
            PyObject **addr_of_value = &DK_UNICODE_ENTRIES(dk)[ix].me_value;
            PyObject *value = _Py_atomic_load_ptr(addr_of_value);
            if (value == NULL) {
                *value_addr = PyStackRef_NULL;
                return DKIX_EMPTY;
            }
            if (_Py_IsImmortal(value) || _PyObject_HasDeferredRefcount(value)) {
                *value_addr =  (_PyStackRef){ .bits = (uintptr_t)value | Py_TAG_DEFERRED };
                return ix;
            }
            if (_Py_TryIncrefCompare(addr_of_value, value)) {
                *value_addr = PyStackRef_FromPyObjectSteal(value);
                return ix;
            }
        }
    }

    PyObject *obj;
    Py_ssize_t ix = _Py_dict_lookup_threadsafe(mp, key, hash, &obj);
    if (ix >= 0 && obj != NULL) {
        *value_addr = PyStackRef_FromPyObjectSteal(obj);
    }
    else {
        *value_addr = PyStackRef_NULL;
    }
    return ix;

PyObject *value;
ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value);
assert (ix >= 0 || value == NULL);
*value_addr = PyStackRef_FromPyObjectSteal(value);
}
else {
ix = dictkeys_generic_lookup_threadsafe(mp, dk, key, hash);
if (ix == DKIX_KEY_CHANGED) {
goto read_failed;
}
if (ix >= 0) {
PyObject **addr_of_value = &(DK_ENTRIES(dk)[ix].me_value);
PyObject *value = _Py_atomic_load_ptr(addr_of_value);
if (value == NULL) {
*value_addr = PyStackRef_NULL;
}
else if (_Py_IsImmortal(value) ||
_PyObject_HasDeferredRefcount(value)) {
*value_addr = PyStackRef_FromPyObjectNew(value);
}
else {
*value_addr = PyStackRef_FromPyObjectSteal(
_Py_TryXGetRef(addr_of_value));
}
if (PyStackRef_IsNull(*value_addr)) {
goto read_failed;
}
if (dk != _Py_atomic_load_ptr(&mp->ma_keys)) {
goto read_failed;
}
}
else {
*value_addr = PyStackRef_NULL;
}
}

return ix;

PyObject *value;
read_failed:
// In addition to the normal races of the dict being modified the _Py_TryXGetRef
// can all fail if they don't yet have a shared ref count. That can happen here
// or in the *_lookup_* helper. In that case we need to take the lock to avoid
// mutation and do a normal incref which will make them shared.
Py_BEGIN_CRITICAL_SECTION(mp);
ix = _Py_dict_lookup(mp, key, hash, &value);
*value_addr = value == NULL ? PyStackRef_NULL : PyStackRef_FromPyObjectNew(value);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't you use the _Py_dict_lookup_threadsafe_stackref helper here since you acquired the lock? This would save the PyObject *value declaration (and you could consider inlining the helper actually)

Py_END_CRITICAL_SECTION();
return ix;
}

#else // Py_GIL_DISABLED

Py_ssize_t
Expand All @@ -1506,6 +1570,15 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
return ix;
}

Py_ssize_t
_Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr)
{
PyObject *val;
Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &val);
*value_addr = val == NULL ? PyStackRef_NULL : PyStackRef_FromPyObjectNew(val);
return ix;
}

#endif

int
Expand Down Expand Up @@ -2418,6 +2491,31 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
return value;
}

void
_PyDict_LoadGlobalStackRef(PyDictObject *globals, PyDictObject *builtins, PyObject *key, _PyStackRef *res)
{
Py_ssize_t ix;
Py_hash_t hash;

hash = _PyObject_HashFast(key);
if (hash == -1) {
*res = PyStackRef_NULL;
}

/* namespace 1: globals */
ix = _Py_dict_lookup_threadsafe_stackref(globals, key, hash, res);
if (ix == DKIX_ERROR) {
*res = PyStackRef_NULL;
}
if (ix != DKIX_EMPTY && !PyStackRef_IsNull(*res)) {
return;
}

/* namespace 2: builtins */
ix = _Py_dict_lookup_threadsafe_stackref(builtins, key, hash, res);
assert(ix >= 0 || PyStackRef_IsNull(*res));
}

/* Consumes references to key and value */
static int
setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value)
Expand Down
9 changes: 4 additions & 5 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1448,8 +1448,8 @@ dummy_func(
&& PyDict_CheckExact(BUILTINS()))
{
v_o = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(),
(PyDictObject *)BUILTINS(),
name);
(PyDictObject *)BUILTINS(),
name);
if (v_o == NULL) {
if (!_PyErr_Occurred(tstate)) {
/* _PyDict_LoadGlobal() returns NULL without raising
Expand Down Expand Up @@ -1507,10 +1507,9 @@ dummy_func(

op(_LOAD_GLOBAL, ( -- res, null if (oparg & 1))) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
PyObject *res_o = _PyEval_LoadGlobal(GLOBALS(), BUILTINS(), name);
ERROR_IF(res_o == NULL, error);
_PyEval_LoadGlobalStackRef(GLOBALS(), BUILTINS(), name, STACK_ENTRY(res));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two suggestions:

  1. Make _PyEval_LoadGlobalStackRef and _PyDict_LoadGlobalStackRef return int error codes
  2. Use &STACK_ENTRY(res) because it makes it more clear that the value is a pointer and because it means the STACK_ENTRY() transformation preserves the type.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will apply 2. but not 1. The reasoning is to keep consistency with _PyEval_LoadGlobal (non-stackref version).

ERROR_IF(PyStackRef_IsNull(res), error);
null = PyStackRef_NULL;
res = PyStackRef_FromPyObjectSteal(res_o);
}

macro(LOAD_GLOBAL) =
Expand Down
20 changes: 11 additions & 9 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -3072,15 +3072,14 @@ _PyEval_GetANext(PyObject *aiter)
return awaitable;
}

PyObject *
_PyEval_LoadGlobal(PyObject *globals, PyObject *builtins, PyObject *name)
void
_PyEval_LoadGlobalStackRef(PyObject *globals, PyObject *builtins, PyObject *name, _PyStackRef *writeto)
{
PyObject *res;
if (PyDict_CheckExact(globals) && PyDict_CheckExact(builtins)) {
res = _PyDict_LoadGlobal((PyDictObject *)globals,
_PyDict_LoadGlobalStackRef((PyDictObject *)globals,
(PyDictObject *)builtins,
name);
if (res == NULL && !PyErr_Occurred()) {
name, writeto);
if (PyStackRef_IsNull(*writeto) && !PyErr_Occurred()) {
/* _PyDict_LoadGlobal() returns NULL without raising
* an exception if the key doesn't exist */
_PyEval_FormatExcCheckArg(PyThreadState_GET(), PyExc_NameError,
Expand All @@ -3090,22 +3089,25 @@ _PyEval_LoadGlobal(PyObject *globals, PyObject *builtins, PyObject *name)
else {
/* Slow-path if globals or builtins is not a dict */
/* namespace 1: globals */
PyObject *res;
if (PyMapping_GetOptionalItem(globals, name, &res) < 0) {
return NULL;
*writeto = PyStackRef_NULL;
return;
}
if (res == NULL) {
/* namespace 2: builtins */
if (PyMapping_GetOptionalItem(builtins, name, &res) < 0) {
return NULL;
*writeto = PyStackRef_NULL;
return;
}
if (res == NULL) {
_PyEval_FormatExcCheckArg(
PyThreadState_GET(), PyExc_NameError,
NAME_ERROR_MSG, name);
}
}
*writeto = PyStackRef_FromPyObjectSteal(res);
}
return res;
}

PyObject *
Expand Down
6 changes: 3 additions & 3 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 30 additions & 2 deletions Tools/cases_generator/generators_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def write_header(
)


def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None:
def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str, *, allow_unbalanced_parens: bool = False) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we want balanced parentheses? What is the construct that doesn't balance parentheses?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically it's when we replace something in the middle of the line but still want the rest of the line (and all it's parantheses)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you give me an example?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E.g. we replace the before foo(STACK_ENTRY(x));, with foo(stack_pointer[-1]);, we want the replacement function to write until the semicolon ;, ignoring all balanced brackets from the replacement point till that semicolon.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding allow_unbalanced_parens can we push the logic into the caller (stack_entry)? Like replace up to the matching ) and then consume/emit tokens up to the end of the statement?

parens = 0
for tkn in tkn_iter:
if tkn.kind == end and parens == 0:
if tkn.kind == end and (parens == 0 or allow_unbalanced_parens):
return
if tkn.kind == "LPAREN":
parens += 1
Expand All @@ -77,6 +77,7 @@ def __init__(self, out: CWriter):
"DECREF_INPUTS": self.decref_inputs,
"SYNC_SP": self.sync_sp,
"PyStackRef_FromPyObjectNew": self.py_stack_ref_from_py_object_new,
"STACK_ENTRY": self.stack_entry,
}
self.out = out

Expand Down Expand Up @@ -211,6 +212,33 @@ def py_stack_ref_from_py_object_new(
# unused portions of the stack to NULL.
stack.flush_single_var(self.out, target, uop.stack.outputs)

def stack_entry(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that the stack calculations are correct here, and I don't think it will be able to handle code that writes to multiple results, like UNPACK....

If we go back to using arrays, we can remove this.

self,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
stack: Stack,
inst: Instruction | None,
) -> None:
emit_to(self.out, tkn_iter, "LPAREN")
target = next(tkn_iter)
size = "0"
for output in uop.stack.inputs:
size += f" - {output.size or 1}"
for output in uop.stack.outputs:
if output.name == target.text:
self.out.emit(f" &stack_pointer[{size}]")
break
size += f" + {output.size or 1}"
else:
raise analysis_error("STACK_ENTRY operand is not a stack output", target)

next(tkn_iter) # Consume )
emit_to(self.out, tkn_iter, "SEMI", allow_unbalanced_parens=True)
self.emit(";\n")
# Update the variable
self.out.emit(f"{target.text} = stack_pointer[{size}];\n")

def emit_tokens(
self,
uop: Uop,
Expand Down