Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Include/internal/pycore_typeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp);
PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version);
PyTypeObject *_PyType_LookupByVersion(unsigned int version);

// Returns 0 on success or caller-specific error on failure.
typedef int (*_py_validate_type)(PyTypeObject *);
// Returns 0 on success, -1 if no type version could be assigned, or the error returned by validate
extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version);

#ifdef __cplusplus
}
#endif
Expand Down
66 changes: 66 additions & 0 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,72 @@ def g():
self.assert_specialized(g, "CONTAINS_OP_SET")
self.assert_no_opcode(g, "CONTAINS_OP")

@cpython_only
@requires_specialization_ft
def test_to_bool(self):
def to_bool_bool():
true_cnt, false_cnt = 0, 0
elems = [e % 2 == 0 for e in range(100)]
for e in elems:
if e:
true_cnt += 1
else:
false_cnt += 1
self.assertEqual(true_cnt, 50)
self.assertEqual(false_cnt, 50)

to_bool_bool()
self.assert_specialized(to_bool_bool, "TO_BOOL_BOOL")
self.assert_no_opcode(to_bool_bool, "TO_BOOL")

def to_bool_int():
count = 0
for i in range(100):
if i:
count += 1
else:
count -= 1
self.assertEqual(count, 98)

to_bool_int()
self.assert_specialized(to_bool_int, "TO_BOOL_INT")
self.assert_no_opcode(to_bool_int, "TO_BOOL")

def to_bool_list():
count = 0
elems = [1, 2, 3]
while elems:
count += elems.pop()
self.assertEqual(elems, [])
self.assertEqual(count, 6)

to_bool_list()
self.assert_specialized(to_bool_list, "TO_BOOL_LIST")
self.assert_no_opcode(to_bool_list, "TO_BOOL")

def to_bool_none():
count = 0
elems = [None, None, None, None]
for e in elems:
if not e:
count += 1
self.assertEqual(count, len(elems))

to_bool_none()
self.assert_specialized(to_bool_none, "TO_BOOL_NONE")
self.assert_no_opcode(to_bool_none, "TO_BOOL")

def to_bool_str():
count = 0
elems = ["", "foo", ""]
for e in elems:
if e:
count += 1
self.assertEqual(count, 1)

to_bool_str()
self.assert_specialized(to_bool_str, "TO_BOOL_STR")
self.assert_no_opcode(to_bool_str, "TO_BOOL")


if __name__ == "__main__":
Expand Down
18 changes: 18 additions & 0 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5645,6 +5645,24 @@ _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags)
END_TYPE_LOCK();
}

int
_PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version)
{
int err;
BEGIN_TYPE_LOCK();
err = validate(ty);
if (!err) {
if(assign_version_tag(_PyInterpreterState_GET(), ty)) {
*tp_version = ty->tp_version_tag;
}
else {
err = -1;
}
}
END_TYPE_LOCK();
return err;
}

static void
set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags)
{
Expand Down
6 changes: 3 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,15 @@ dummy_func(
};

specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
OPCODE_DEFERRED_INC(TO_BOOL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}

op(_TO_BOOL, (value -- res)) {
Expand Down Expand Up @@ -435,7 +435,7 @@ dummy_func(
PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
EXIT_IF(!PyList_CheckExact(value_o));
STAT_INC(TO_BOOL, hit);
res = Py_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
res = PyList_GET_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
DECREF_INPUTS();
}

Expand Down
2 changes: 1 addition & 1 deletion Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

145 changes: 73 additions & 72 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -2663,101 +2663,102 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr)
cache->counter = adaptive_counter_cooldown();
}

static int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be surrounded by #ifdef Py_STATS. We don't want to be calling this unless stats are enabled.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mpage
I have same concerns with this change, this change was first introduced at #126414, do we need to make it as dummy function for the default build?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the performance overhead of calling this will matter, but if it does, we could replace it with a dummy function that returns a special sentinel value indicating that stats are disabled.

Copy link
Contributor

@mpage mpage Nov 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both clang (18.18) and GCC (11.5.0) eliminate the call to to_bool_fail_kind if stats are disabled because the result of the call is unused.

to_bool_fail_kind(PyObject *value)
{
if (PyByteArray_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_BYTEARRAY;
}
if (PyBytes_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_BYTES;
}
if (PyDict_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_DICT;
}
if (PyFloat_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_FLOAT;
}
if (PyMemoryView_Check(value)) {
return SPEC_FAIL_TO_BOOL_MEMORY_VIEW;
}
if (PyAnySet_CheckExact(value)) {
return SPEC_FAIL_TO_BOOL_SET;
}
if (PyTuple_CheckExact(value)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not for this PR to address, but why is the tuple type handled here? I see no specialization for tuple (or did I miss something?). Tuple is immutable, so it should be relatively easy to add

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, we added specializaiton based on metric that is measured at https://github.com/faster-cpython/benchmarking-public.
I am not sure how many tuple cases are actually existed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that tuple case is easy to add but that would be based on how many boolean operations are existed for the tuple at the real world workload.

return SPEC_FAIL_TO_BOOL_TUPLE;
}
return SPEC_FAIL_OTHER;
}

static int
check_type_always_true(PyTypeObject *ty)
{
PyNumberMethods *nb = ty->tp_as_number;
if (nb && nb->nb_bool) {
return SPEC_FAIL_TO_BOOL_NUMBER;
}
PyMappingMethods *mp = ty->tp_as_mapping;
if (mp && mp->mp_length) {
return SPEC_FAIL_TO_BOOL_MAPPING;
}
PySequenceMethods *sq = ty->tp_as_sequence;
if (sq && sq->sq_length) {
return SPEC_FAIL_TO_BOOL_SEQUENCE;
}
return 0;
}


void
_Py_Specialize_ToBool(_PyStackRef value_o, _Py_CODEUNIT *instr)
{
assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[TO_BOOL] == INLINE_CACHE_ENTRIES_TO_BOOL);
_PyToBoolCache *cache = (_PyToBoolCache *)(instr + 1);
PyObject *value = PyStackRef_AsPyObjectBorrow(value_o);
if (PyBool_Check(value)) {
instr->op.code = TO_BOOL_BOOL;
goto success;
specialize(instr, TO_BOOL_BOOL);
return;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we leave the gotos, and have one call to specialize at the end?
This seems very repetitive and adds the overhead of several calls.

Copy link
Contributor

@mpage mpage Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is any more repetitive that what was previously here and is easier to reason about. It's also the same number of function calls (1) which will be inlined.

Copy link
Contributor

@mpage mpage Nov 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To elaborate a bit, it is the same amount of repetition in either case. The only difference is what is being repeated. The previous approach repeats the assignment to the opcode and the goto:

if (PyBool_Check(value)) {
    instr->op.code = TO_BOOL_BOOL;
    goto success;
}

while the new approach repeats the call to specialize and the return:

if (PyBool_Check(value)) {
    specialize(instr, TO_BOOL_BOOL);
    return;
}

The benefit of the latter approach is that the control flow is easier to follow.

Finally, we make one call to specialize and that call is inlined. As noted below, the generated code for the two versions of _Py_Specialize_ToBool is nearly identical.

}
if (PyLong_CheckExact(value)) {
instr->op.code = TO_BOOL_INT;
goto success;
specialize(instr, TO_BOOL_INT);
return;
}
if (PyList_CheckExact(value)) {
instr->op.code = TO_BOOL_LIST;
goto success;
specialize(instr, TO_BOOL_LIST);
return;
}
if (Py_IsNone(value)) {
instr->op.code = TO_BOOL_NONE;
goto success;
specialize(instr, TO_BOOL_NONE);
return;
}
if (PyUnicode_CheckExact(value)) {
instr->op.code = TO_BOOL_STR;
goto success;
specialize(instr, TO_BOOL_STR);
return;
}
if (PyType_HasFeature(Py_TYPE(value), Py_TPFLAGS_HEAPTYPE)) {
PyNumberMethods *nb = Py_TYPE(value)->tp_as_number;
if (nb && nb->nb_bool) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_NUMBER);
goto failure;
}
PyMappingMethods *mp = Py_TYPE(value)->tp_as_mapping;
if (mp && mp->mp_length) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MAPPING);
goto failure;
}
PySequenceMethods *sq = Py_TYPE(value)->tp_as_sequence;
if (sq && sq->sq_length) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SEQUENCE);
goto failure;
unsigned int version = 0;
int err = _PyType_Validate(Py_TYPE(value), check_type_always_true, &version);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be adding a fair bit of overhead and indirection.
Would it be sufficient to set the version number before performing the other checks, and check that it hasn't changed at the end? The extra check would be benign in the default build.

Copy link
Contributor

@mpage mpage Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be adding a fair bit of overhead and indirection.

This adds one extra function call (the call to check_type_always_true).

Would it be sufficient to set the version number before performing the other checks, and check that it hasn't changed at the end?

This might be possible, but would require using something like a sequence lock to do correctly, which is more complicated and harder to reason about than a solution that acquires the lock around the validation that needs to be performed.

if (err < 0) {
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS);
return;
}
if (!PyUnstable_Type_AssignVersionTag(Py_TYPE(value))) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OUT_OF_VERSIONS);
goto failure;
else if (err > 0) {
unspecialize(instr, err);
return;
}
uint32_t version = type_get_version(Py_TYPE(value), TO_BOOL);

assert(err == 0);
if (version == 0) {
goto failure;
unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS);
return;
}
instr->op.code = TO_BOOL_ALWAYS_TRUE;
write_u32(cache->version, version);
assert(version);
goto success;
}
#ifdef Py_STATS
if (PyByteArray_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTEARRAY);
goto failure;
}
if (PyBytes_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTES);
goto failure;
}
if (PyDict_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_DICT);
goto failure;
}
if (PyFloat_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_FLOAT);
goto failure;
}
if (PyMemoryView_Check(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MEMORY_VIEW);
goto failure;
}
if (PyAnySet_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SET);
goto failure;
}
if (PyTuple_CheckExact(value)) {
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_TUPLE);
goto failure;
write_u32(cache->version, version);
specialize(instr, TO_BOOL_ALWAYS_TRUE);
return;
}
SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OTHER);
#endif // Py_STATS
failure:
STAT_INC(TO_BOOL, failure);
instr->op.code = TO_BOOL;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(TO_BOOL, success);
cache->counter = adaptive_counter_cooldown();
unspecialize(instr, to_bool_fail_kind(value));
}

static int
Expand Down
Loading