Skip to content

Commit 40988a0

Browse files
committed
Address review comments
1 parent cbee8d2 commit 40988a0

File tree

5 files changed

+31
-30
lines changed

5 files changed

+31
-30
lines changed

Misc/NEWS.d/next/Core_and_Builtins/2025-06-13-13-32-16.gh-issue-135379.pAxZgy.rst

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
Implement a limited form of register allocation know as "top of stack
1+
Implement a limited form of register allocation known as "top of stack
22
caching" in the JIT. It works by keeping 0-3 of the top items in the stack
3-
in registers. The code generator generates multiple versions of thos uops
3+
in registers. The code generator generates multiple versions of those uops
44
that do not escape and are relatively small. During JIT compilation, the
55
copy that produces the least memory traffic is selected, spilling or
66
reloading values when needed.

Python/optimizer.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,18 +1005,24 @@ count_exits(_PyUOpInstruction *buffer, int length)
10051005
return exit_count;
10061006
}
10071007

1008+
#define MAX_CACHED_REGISTER 3
1009+
1010+
/* The number of cached registers at any exit (`EXIT_IF` or `DEOPT_IF`)
1011+
* This is the number of cached at entries at start, unless the uop is
1012+
* marked as `exit_depth_is_output` in which case it is the number of
1013+
* cached entries at the end */
10081014
static int
1009-
get_exit_depth(_PyUOpInstruction *inst)
1015+
get_cached_entries_for_side_exit(_PyUOpInstruction *inst)
10101016
{
1011-
10121017
// TO DO -- Add another generated table for this?
10131018
int base_opcode = _PyUop_Uncached[inst->opcode];
10141019
assert(base_opcode != 0);
10151020
if (_PyUop_Flags[base_opcode] & HAS_DEOPT_FLAG) {
10161021
return 0;
10171022
}
10181023
int input = -1;
1019-
for (int i = 0; i < 4; i++) {
1024+
/* Find number of cached entries at input. */
1025+
for (int i = 0; i <= MAX_CACHED_REGISTER; i++) {
10201026
if (_PyUop_Caching[base_opcode].opcodes[i] == inst->opcode) {
10211027
input = i;
10221028
break;
@@ -1076,7 +1082,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
10761082
if (_PyUop_Flags[base_opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
10771083
uint16_t base_exit_op = (_PyUop_Flags[base_opcode] & HAS_EXIT_FLAG) ?
10781084
_EXIT_TRACE : _DEOPT;
1079-
int exit_depth = get_exit_depth(inst);
1085+
int exit_depth = get_cached_entries_for_side_exit(inst);
10801086
uint16_t exit_op = _PyUop_Caching[base_exit_op].opcodes[exit_depth];
10811087
int32_t jump_target = target;
10821088
if (is_for_iter_test[base_opcode]) {

Tools/cases_generator/analyzer.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt
99

10+
MAX_CACHED_REGISTER = 3
11+
1012
@dataclass
1113
class EscapingCall:
1214
stmt: SimpleStmt
@@ -1221,13 +1223,13 @@ def is_large(uop: Uop) -> bool:
12211223

12221224
def get_uop_cache_depths(uop: Uop) -> Iterator[tuple[int, int, int]]:
12231225
if uop.name == "_SPILL_OR_RELOAD":
1224-
for inputs in range(4):
1225-
for outputs in range(4):
1226+
for inputs in range(MAX_CACHED_REGISTER+1):
1227+
for outputs in range(MAX_CACHED_REGISTER+1):
12261228
if inputs != outputs:
12271229
yield inputs, outputs, inputs
12281230
return
12291231
if uop.name == "_EXIT_TRACE":
1230-
for i in range(4):
1232+
for i in range(MAX_CACHED_REGISTER+1):
12311233
yield i, 0, i
12321234
return
12331235
if uop.name in ("_START_EXECUTOR", "_JUMP_TO_TOP", "_DEOPT", "_ERROR_POP_N"):
@@ -1256,24 +1258,18 @@ def get_uop_cache_depths(uop: Uop) -> Iterator[tuple[int, int, int]]:
12561258
if item.peek and uop.properties.escapes:
12571259
break
12581260
ideal_outputs += 1
1259-
if ideal_inputs > 3:
1260-
ideal_inputs = 3
1261-
if ideal_outputs > 3:
1262-
ideal_outputs = 3
1261+
if ideal_inputs > MAX_CACHED_REGISTER:
1262+
ideal_inputs = MAX_CACHED_REGISTER
1263+
if ideal_outputs > MAX_CACHED_REGISTER:
1264+
ideal_outputs = MAX_CACHED_REGISTER
12631265
if non_decref_escape:
12641266
yield 0, ideal_outputs, 0
12651267
return
1266-
# If a uop has an exit, we can get in a mess if the stack caching
1267-
# changes during execution.
1268-
#if has_exit and ideal_inputs != ideal_outputs:
1269-
# n = min(ideal_inputs, ideal_outputs)
1270-
# yield n, n
1271-
# return
12721268
exit_depth = ideal_outputs if uop.properties.sync_sp else ideal_inputs
12731269
yield ideal_inputs, ideal_outputs, exit_depth
12741270
if uop.properties.escapes or uop.properties.sync_sp or has_array or is_large(uop):
12751271
return
1276-
if ideal_inputs >= 3 or ideal_outputs >= 3:
1272+
if ideal_inputs == MAX_CACHED_REGISTER or ideal_outputs == MAX_CACHED_REGISTER:
12771273
return
12781274
inputs, outputs = ideal_inputs, ideal_outputs
12791275
if inputs < outputs:

Tools/cases_generator/uop_metadata_generator.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
analyze_files,
1111
get_uop_cache_depths,
1212
Uop,
13+
MAX_CACHED_REGISTER,
1314
)
1415
from generators_common import (
1516
DEFAULT_INPUT,
@@ -24,14 +25,15 @@
2425
DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_uop_metadata.h"
2526

2627
def uop_cache_info(uop: Uop) -> str | None:
28+
table_size = MAX_CACHED_REGISTER + 1
2729
if uop.name == "_SPILL_OR_RELOAD":
2830
return None
2931
min_inputs = 4
30-
uops = [ "0" ] * 4
32+
uops = [ "0" ] * table_size
3133
for inputs, outputs, exit_depth in get_uop_cache_depths(uop):
3234
delta = outputs - inputs
3335
uops[inputs] = f"{uop.name}_r{inputs}{outputs}"
34-
for i in range(4):
36+
for i in range(table_size):
3537
if uops[i] != "0":
3638
max_inputs = i
3739
if i < min_inputs:
@@ -48,10 +50,10 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
4850
out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
4951
out.emit("typedef struct _pyuop_info {\n")
5052
out.emit("int8_t min_input; int8_t max_input; int8_t delta;\n")
51-
out.emit("int8_t exit_depth_is_output; uint16_t opcodes[4];\n")
53+
out.emit(f"int8_t exit_depth_is_output; uint16_t opcodes[{MAX_CACHED_REGISTER+1}];\n")
5254
out.emit("} _PyUopCachingInfo;\n")
5355
out.emit("extern const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1];\n\n")
54-
out.emit("extern const uint16_t _PyUop_SpillsAndReloads[4][4];\n")
56+
out.emit(f"extern const uint16_t _PyUop_SpillsAndReloads[{MAX_CACHED_REGISTER+1}][{MAX_CACHED_REGISTER+1}];\n")
5557
out.emit("extern const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1];\n\n")
5658
out.emit("#ifdef NEED_OPCODE_METADATA\n")
5759
out.emit("const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {\n")
@@ -80,9 +82,9 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
8082
for inputs, outputs, _ in get_uop_cache_depths(uop):
8183
out.emit(f"[{uop.name}_r{inputs}{outputs}] = {uop.name},\n")
8284
out.emit("};\n\n")
83-
out.emit("const uint16_t _PyUop_SpillsAndReloads[4][4] = {\n")
84-
for i in range(4):
85-
for j in range(4):
85+
out.emit(f"const uint16_t _PyUop_SpillsAndReloads[{MAX_CACHED_REGISTER+1}][{MAX_CACHED_REGISTER+1}] = {{\n")
86+
for i in range(MAX_CACHED_REGISTER+1):
87+
for j in range(MAX_CACHED_REGISTER+1):
8688
if i != j:
8789
out.emit(f"[{i}][{j}] = _SPILL_OR_RELOAD_r{i}{j},\n")
8890
out.emit("};\n\n")

0 commit comments

Comments
 (0)