Skip to content

Commit cf5fcd0

Browse files
committed
Rework FOR_ITER specialization in the free-threaded build to only apply to
uniquely referenced iterators. This handles the common case of 'for item in seq' (where 'seq' is a list, tuple or range object) and 'for item in generator_function()', but not, for example, 'g = gen(...); for item in g:'.
1 parent 0870ce7 commit cf5fcd0

File tree

11 files changed

+352
-138
lines changed

11 files changed

+352
-138
lines changed

Include/internal/pycore_list.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ extern void _PyList_DebugMallocStats(FILE *out);
1515
extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i);
1616
#ifdef Py_GIL_DISABLED
1717
// Returns -1 in case of races with other threads.
18-
extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t i, PyObject ** result);
18+
extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t, PyObject **);
1919
#endif
2020

2121
#define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)

Include/internal/pycore_uop_metadata.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import dis
2+
import queue
3+
import threading
4+
import time
5+
import unittest
6+
from test.support import (import_helper, cpython_only, Py_GIL_DISABLED,
7+
requires_specialization_ft)
8+
9+
_testinternalcapi = import_helper.import_module("_testinternalcapi")
10+
_testlimitedcapi = import_helper.import_module("_testlimitedcapi")
11+
12+
NUMTHREADS = 5
13+
14+
def get_tlbc_instructions(f):
15+
co = dis._get_code_object(f)
16+
tlbc = _testinternalcapi.get_tlbc(co)
17+
return [i.opname for i in dis._get_instructions_bytes(tlbc)]
18+
19+
20+
class IterationDeoptTests(unittest.TestCase):
21+
def check_deopt(self, get_iter, opcode, is_generator=False):
22+
input = range(100)
23+
expected_len = len(input)
24+
q = queue.Queue()
25+
barrier = threading.Barrier(NUMTHREADS + 1)
26+
done = threading.Event()
27+
def worker():
28+
# A complicated dance to get a weak reference to an iterator
29+
# _only_ (strongly) referenced by the for loop, so that we can
30+
# force our loop to deopt mid-way through.
31+
it = get_iter(input)
32+
ref = _testlimitedcapi.pylong_fromvoidptr(it)
33+
q.put(ref)
34+
# We can't use enumerate without affecting the loop, so keep a
35+
# manual counter.
36+
i = 0
37+
loop_a_little_more = 5
38+
results = []
39+
try:
40+
# Make sure we're not still specialized from a previous run.
41+
ops = get_tlbc_instructions(worker)
42+
self.assertIn('FOR_ITER', ops)
43+
self.assertNotIn(opcode, ops)
44+
for item in it:
45+
results.append(item)
46+
i += 1
47+
48+
# We have to be very careful exiting the loop, because
49+
# if the main thread hasn't dereferenced the unsafe
50+
# weakref to our iterator yet, exiting will make it
51+
# invalid and cause a crash. Getting the timing right is
52+
# difficult, though, since it depends on the OS
53+
# scheduler and the system load. As a final safeguard,
54+
# if we're close to finishing the loop, just wait for the
55+
# main thread.
56+
if i + loop_a_little_more > expected_len:
57+
done.wait()
58+
59+
if i == 1:
60+
del it
61+
# Warm up. The first iteration didn't count because of
62+
# the extra reference to the iterator.
63+
if i < 10:
64+
continue
65+
if i == 10:
66+
ops = get_tlbc_instructions(worker)
67+
self.assertIn(opcode, ops)
68+
# Let the main thread know it's time to reference our iterator.
69+
barrier.wait()
70+
continue
71+
# Continue iterating while at any time our loop may be
72+
# forced to deopt, but try to get the thread scheduler
73+
# to give the main thread a chance to run.
74+
if not done.is_set():
75+
time.sleep(0)
76+
continue
77+
if loop_a_little_more:
78+
# Loop a little more after 'done' is set to make sure we
79+
# introduce a tsan-detectable race if the loop isn't
80+
# deopting appropriately.
81+
loop_a_little_more -= 1
82+
continue
83+
break
84+
self.assertEqual(results, list(input)[:i])
85+
except threading.BrokenBarrierError:
86+
return
87+
except Exception as e:
88+
# In case the exception happened before the last barrier,
89+
# reset it so nothing is left hanging.
90+
barrier.reset()
91+
# In case it's the final assertion that failed, just add it
92+
# to the result queue so it'll show up in the normal test
93+
# output.
94+
q.put(e)
95+
raise
96+
q.put("SUCCESS")
97+
# Reset specialization and thread-local bytecode from previous runs.
98+
worker.__code__ = worker.__code__.replace()
99+
threads = [threading.Thread(target=worker) for i in range(NUMTHREADS)]
100+
for t in threads:
101+
t.start()
102+
# Get the "weakrefs" from the worker threads.
103+
refs = [q.get() for i in range(NUMTHREADS)]
104+
# Wait for each thread to finish its specialization check.
105+
barrier.wait()
106+
# Dereference the "weakrefs" we were sent in an extremely unsafe way.
107+
iterators = [_testlimitedcapi.pylong_asvoidptr(ref) for ref in refs]
108+
done.set()
109+
self.assertNotIn(None, iterators)
110+
# Read data that the iteration writes, to trigger data races if they
111+
# don't deopt appropriately.
112+
if is_generator:
113+
for it in iterators:
114+
it.gi_running
115+
else:
116+
for it in iterators:
117+
it.__reduce__()
118+
for t in threads:
119+
t.join()
120+
results = [q.get() for i in range(NUMTHREADS)]
121+
self.assertEqual(results, ["SUCCESS"] * NUMTHREADS)
122+
123+
@cpython_only
124+
@requires_specialization_ft
125+
@unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading")
126+
def test_deopt_leaking_iterator_list(self):
127+
def make_list_iter(input):
128+
return iter(list(input))
129+
self.check_deopt(make_list_iter, 'FOR_ITER_LIST')
130+
131+
@cpython_only
132+
@requires_specialization_ft
133+
@unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading")
134+
def test_deopt_leaking_iterator_tuple(self):
135+
def make_tuple_iter(input):
136+
return iter(tuple(input))
137+
self.check_deopt(make_tuple_iter, 'FOR_ITER_TUPLE')
138+
139+
@cpython_only
140+
@requires_specialization_ft
141+
@unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading")
142+
def test_deopt_leaking_iterator_range(self):
143+
def make_range_iter(input):
144+
return iter(input)
145+
self.check_deopt(make_range_iter, 'FOR_ITER_RANGE')
146+
147+
@cpython_only
148+
@requires_specialization_ft
149+
@unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading")
150+
def test_deopt_leaking_iterator_generator(self):
151+
def gen(input):
152+
for item in input:
153+
yield item
154+
self.check_deopt(gen, 'FOR_ITER_GEN', is_generator=True)
155+

Lib/test/test_opcache.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,44 @@ def compare_op_str():
16311631
self.assert_specialized(compare_op_str, "COMPARE_OP_STR")
16321632
self.assert_no_opcode(compare_op_str, "COMPARE_OP")
16331633

1634+
@cpython_only
1635+
@requires_specialization_ft
1636+
def test_for_iter(self):
1637+
L = list(range(10))
1638+
def for_iter_list():
1639+
for i in L:
1640+
self.assertIn(i, L)
1641+
1642+
for_iter_list()
1643+
self.assert_specialized(for_iter_list, "FOR_ITER_LIST")
1644+
self.assert_no_opcode(for_iter_list, "FOR_ITER")
1645+
1646+
t = tuple(range(10))
1647+
def for_iter_tuple():
1648+
for i in t:
1649+
self.assertIn(i, t)
1650+
1651+
for_iter_tuple()
1652+
self.assert_specialized(for_iter_tuple, "FOR_ITER_TUPLE")
1653+
self.assert_no_opcode(for_iter_tuple, "FOR_ITER")
1654+
1655+
r = range(10)
1656+
def for_iter_range():
1657+
for i in r:
1658+
self.assertIn(i, r)
1659+
1660+
for_iter_range()
1661+
self.assert_specialized(for_iter_range, "FOR_ITER_RANGE")
1662+
self.assert_no_opcode(for_iter_range, "FOR_ITER")
1663+
1664+
def for_iter_generator():
1665+
for i in (i for i in range(10)):
1666+
i + 1
1667+
1668+
for_iter_generator()
1669+
self.assert_specialized(for_iter_generator, "FOR_ITER_GEN")
1670+
self.assert_no_opcode(for_iter_generator, "FOR_ITER")
1671+
16341672

16351673
if __name__ == "__main__":
16361674
unittest.main()

Objects/rangeobject.c

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "pycore_modsupport.h" // _PyArg_NoKwnames()
88
#include "pycore_range.h"
99
#include "pycore_tuple.h" // _PyTuple_ITEMS()
10-
#include "pycore_pyatomic_ft_wrappers.h"
1110

1211

1312
/* Support objects whose length is > PY_SSIZE_T_MAX.
@@ -817,12 +816,10 @@ PyTypeObject PyRange_Type = {
817816
static PyObject *
818817
rangeiter_next(_PyRangeIterObject *r)
819818
{
820-
long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len);
821-
if (len > 0) {
822-
long result = FT_ATOMIC_LOAD_LONG_RELAXED(r->start);
823-
FT_ATOMIC_STORE_LONG_RELAXED(r->start, result + r->step);
824-
// Relaxed ops for maximum speed and minimum thread-safety.
825-
FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - 1);
819+
if (r->len > 0) {
820+
long result = r->start;
821+
r->start = result + r->step;
822+
r->len--;
826823
return PyLong_FromLong(result);
827824
}
828825
return NULL;
@@ -831,7 +828,7 @@ rangeiter_next(_PyRangeIterObject *r)
831828
static PyObject *
832829
rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
833830
{
834-
return PyLong_FromLong(FT_ATOMIC_LOAD_LONG_RELAXED(r->len));
831+
return PyLong_FromLong(r->len);
835832
}
836833

837834
PyDoc_STRVAR(length_hint_doc,
@@ -844,11 +841,10 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
844841
PyObject *range;
845842

846843
/* create a range object for pickling */
847-
long lstart = FT_ATOMIC_LOAD_LONG_RELAXED(r->start);
848-
start = PyLong_FromLong(lstart);
844+
start = PyLong_FromLong(r->start);
849845
if (start == NULL)
850846
goto err;
851-
stop = PyLong_FromLong(lstart + FT_ATOMIC_LOAD_LONG_RELAXED(r->len) * r->step);
847+
stop = PyLong_FromLong(r->start + r->len * r->step);
852848
if (stop == NULL)
853849
goto err;
854850
step = PyLong_FromLong(r->step);
@@ -875,14 +871,12 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state)
875871
if (index == -1 && PyErr_Occurred())
876872
return NULL;
877873
/* silently clip the index value */
878-
long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len);
879874
if (index < 0)
880875
index = 0;
881-
else if (index > len)
882-
index = len; /* exhausted iterator */
883-
FT_ATOMIC_STORE_LONG_RELAXED(r->start,
884-
FT_ATOMIC_LOAD_LONG_RELAXED(r->start) + index * r->step);
885-
FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - index);
876+
else if (index > r->len)
877+
index = r->len; /* exhausted iterator */
878+
r->start += index * r->step;
879+
r->len -= index;
886880
Py_RETURN_NONE;
887881
}
888882

Objects/tupleobject.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,15 +1020,13 @@ tupleiter_next(PyObject *self)
10201020
return NULL;
10211021
assert(PyTuple_Check(seq));
10221022

1023-
Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
1024-
if ((size_t)idx < (size_t)PyTuple_GET_SIZE(seq)) {
1025-
item = PyTuple_GET_ITEM(seq, idx++);
1026-
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, idx);
1023+
if (it->it_index < PyTuple_GET_SIZE(seq)) {
1024+
item = PyTuple_GET_ITEM(seq, it->it_index);
1025+
++it->it_index;
10271026
return Py_NewRef(item);
10281027
}
10291028

1030-
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, -1);
1031-
FT_ATOMIC_STORE_PTR_RELAXED(it->it_seq, NULL);
1029+
it->it_seq = NULL;
10321030
Py_DECREF(seq);
10331031
return NULL;
10341032
}

0 commit comments

Comments
 (0)