Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 1 addition & 29 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern "C" {
#endif

#include "pycore_typedefs.h" // _PyInterpreterFrame
#include "pycore_uop.h" // _PyUOpInstruction
#include "pycore_uop_ids.h"
#include "pycore_stackref.h" // _PyStackRef
#include <stdbool.h>
Expand Down Expand Up @@ -41,32 +42,6 @@ typedef struct {
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
} _PyVMData;

/* Depending on the format,
* the 32 bits between the oparg and operand are:
* UOP_FORMAT_TARGET:
* uint32_t target;
* UOP_FORMAT_JUMP
* uint16_t jump_target;
* uint16_t error_target;
*/
typedef struct {
uint16_t opcode:15;
uint16_t format:1;
uint16_t oparg;
union {
uint32_t target;
struct {
uint16_t jump_target;
uint16_t error_target;
};
};
uint64_t operand0; // A cache entry
uint64_t operand1;
#ifdef Py_STATS
uint64_t execution_count;
#endif
} _PyUOpInstruction;

typedef struct _PyExitData {
uint32_t target;
uint16_t index;
Expand Down Expand Up @@ -118,9 +93,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
// trace_run_counter is greater than this value.
#define JIT_CLEANUP_THRESHOLD 100000

// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 1200

#define TRACE_STACK_SIZE 5

int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ extern "C" {
#include "pycore_freelist_state.h" // struct _Py_freelists
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
#include "pycore_qsbr.h" // struct qsbr
#include "pycore_uop.h" // struct _PyUOpInstruction


#ifdef Py_GIL_DISABLED
Expand Down Expand Up @@ -76,6 +77,10 @@ typedef struct _PyThreadStateImpl {
Py_ssize_t reftotal; // this thread's total refcount operations
#endif

#ifdef _Py_TIER2
struct _PyUOpInstruction *jit_uop_buffer;
#endif

} _PyThreadStateImpl;

#ifdef __cplusplus
Expand Down
44 changes: 44 additions & 0 deletions Include/internal/pycore_uop.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#ifndef Py_CORE_UOP_H
#define Py_CORE_UOP_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#include <stdint.h>
/* Depending on the format,
* the 32 bits between the oparg and operand are:
* UOP_FORMAT_TARGET:
* uint32_t target;
* UOP_FORMAT_JUMP
* uint16_t jump_target;
* uint16_t error_target;
*/
typedef struct _PyUOpInstruction{
uint16_t opcode:15;
uint16_t format:1;
uint16_t oparg;
union {
uint32_t target;
struct {
uint16_t jump_target;
uint16_t error_target;
};
};
uint64_t operand0; // A cache entry
uint64_t operand1;
#ifdef Py_STATS
uint64_t execution_count;
#endif
} _PyUOpInstruction;

// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 1200

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_UOP_H */
1 change: 1 addition & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -1435,6 +1435,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_unicodeobject_generated.h \
$(srcdir)/Include/internal/pycore_unionobject.h \
$(srcdir)/Include/internal/pycore_uniqueid.h \
$(srcdir)/Include/internal/pycore_uop.h \
$(srcdir)/Include/internal/pycore_uop_ids.h \
$(srcdir)/Include/internal/pycore_uop_metadata.h \
$(srcdir)/Include/internal/pycore_warnings.h \
Expand Down
9 changes: 9 additions & 0 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,15 @@
<ClInclude Include="..\Include\internal\pycore_uniqueid.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_uop.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_uop_ids.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_uop_metadata.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\mimalloc\mimalloc.h">
<Filter>Include\internal\mimalloc</Filter>
</ClInclude>
Expand Down
9 changes: 8 additions & 1 deletion Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,14 @@ uop_optimize(
{
_PyBloomFilter dependencies;
_Py_BloomFilter_Init(&dependencies);
_PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
if (tstate->jit_uop_buffer == NULL) {
tstate->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction));
if (tstate->jit_uop_buffer == NULL) {
return 0;
}
}
_PyUOpInstruction *buffer = tstate->jit_uop_buffer;
OPT_STAT_INC(attempts);
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
bool is_noopt = true;
Expand Down
13 changes: 13 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1536,6 +1536,11 @@ new_threadstate(PyInterpreterState *interp, int whence)
}
#endif

#ifdef _Py_TIER2
// Ensure the buffer is to be set as NULL.
tstate->jit_uop_buffer = NULL;
#endif

/* We serialize concurrent creation to protect global state. */
HEAD_LOCK(interp->runtime);

Expand Down Expand Up @@ -1726,6 +1731,14 @@ PyThreadState_Clear(PyThreadState *tstate)
_Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate);
#endif

#ifdef _Py_TIER2
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
if (_tstate->jit_uop_buffer != NULL) {
PyMem_RawFree(_tstate->jit_uop_buffer);
_tstate->jit_uop_buffer = NULL;
}
#endif

// Merge our queue of pointers to be freed into the interpreter queue.
_PyMem_AbandonDelayed(tstate);

Expand Down
Loading