diff --git a/ddtrace/internal/datadog/profiling/echion/.clang-format b/ddtrace/internal/datadog/profiling/echion/.clang-format new file mode 120000 index 00000000000..5812cd8ac9a --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/.clang-format @@ -0,0 +1 @@ +./scripts/.clang-format \ No newline at end of file diff --git a/ddtrace/internal/datadog/profiling/echion/.gitignore b/ddtrace/internal/datadog/profiling/echion/.gitignore new file mode 100644 index 00000000000..df58abda7a2 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/.gitignore @@ -0,0 +1,153 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Protobuf compiler +protoc* + +# VS Code +.vscode/ + +# setuptools-scm +_version.py + +# profiles +profiles/ + +# Cursor +.cursor/ diff --git a/ddtrace/internal/datadog/profiling/echion/echion/cache.h b/ddtrace/internal/datadog/profiling/echion/echion/cache.h new file mode 100644 index 00000000000..6a7a5072e23 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/cache.h @@ -0,0 +1,69 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include +#include +#include + +#include + +#define CACHE_MAX_ENTRIES 2048 + +template +class LRUCache +{ +public: + LRUCache(size_t capacity) : capacity(capacity) {} + + Result> lookup(const K& k); + + void store(const K& k, std::unique_ptr v); + + class LookupError : public std::exception + { + public: + const char* what() const noexcept override + { + return "Key not found in cache"; + } + }; + +private: + size_t capacity; + std::list>> items; + std::unordered_map>>::iterator> index; +}; + +template +void LRUCache::store(const K& k, std::unique_ptr v) +{ + // Check if cache is full + if (items.size() >= capacity) + { + index.erase(items.back().first); + items.pop_back(); + } + + // Insert the new item at front of the list + items.emplace_front(k, std::move(v)); + + // Insert in the map + index[k] = items.begin(); +} + +template +Result> LRUCache::lookup(const K& k) +{ + auto itr = index.find(k); + if (itr == index.end()) + return ErrorKind::LookupError; + + // Move to the front of the list + items.splice(items.begin(), items, itr->second); + + return std::reference_wrapper(*(itr->second->second.get())); +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/config.h b/ddtrace/internal/datadog/profiling/echion/echion/config.h new file mode 100644 index 00000000000..0a88ce08816 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/config.h @@ -0,0 +1,137 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#include + +// Sampling interval +inline unsigned int interval = 1000; + +// CPU Time mode +inline int cpu = 0; + +// For cpu time mode, Echion only unwinds threads that're running by default. +// Set this to false to unwind all threads. +inline bool ignore_non_running_threads = true; + +// Memory events +inline int memory = 0; + +// Native stack sampling +inline int native = 0; + +// Where mode +inline int where = 0; + +// Maximum number of frames to unwind +inline unsigned int max_frames = 2048; + +// Pipe name (where mode IPC) +inline std::string pipe_name; + +// ---------------------------------------------------------------------------- +static PyObject* set_interval(PyObject* Py_UNUSED(m), PyObject* args) +{ + unsigned int new_interval; + if (!PyArg_ParseTuple(args, "I", &new_interval)) + return NULL; + + interval = new_interval; + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +inline void _set_cpu(int new_cpu) +{ + cpu = new_cpu; +} + +// ---------------------------------------------------------------------------- +inline void _set_ignore_non_running_threads(bool new_ignore_non_running_threads) +{ + ignore_non_running_threads = new_ignore_non_running_threads; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_cpu(PyObject* Py_UNUSED(m), PyObject* args) +{ + int new_cpu; + if (!PyArg_ParseTuple(args, "p", &new_cpu)) + return NULL; + + _set_cpu(new_cpu); + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_memory(PyObject* Py_UNUSED(m), PyObject* args) +{ + int new_memory; + if (!PyArg_ParseTuple(args, "p", &new_memory)) + return NULL; + + memory = new_memory; + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_native(PyObject* Py_UNUSED(m), PyObject* args) +{ +#ifndef UNWIND_NATIVE_DISABLE + int new_native; + if (!PyArg_ParseTuple(args, "p", &new_native)) + return NULL; + + native = new_native; +#else + PyErr_SetString(PyExc_RuntimeError, + "Native profiling is disabled, please re-build/install echion without " + "UNWIND_NATIVE_DISABLE env var/preprocessor flag"); + return NULL; +#endif // UNWIND_NATIVE_DISABLE + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_where(PyObject* Py_UNUSED(m), PyObject* args) +{ + int value; + if (!PyArg_ParseTuple(args, "p", &value)) + return NULL; + + where = value; + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_pipe_name(PyObject* Py_UNUSED(m), PyObject* args) +{ + const char* name; + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + pipe_name = name; + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_max_frames(PyObject* Py_UNUSED(m), PyObject* args) +{ + unsigned int new_max_frames; + if (!PyArg_ParseTuple(args, "I", &new_max_frames)) + return NULL; + + max_frames = new_max_frames; + + Py_RETURN_NONE; +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/cpython/tasks.h b/ddtrace/internal/datadog/profiling/echion/echion/cpython/tasks.h new file mode 100644 index 00000000000..ca1244b295f --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/cpython/tasks.h @@ -0,0 +1,294 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#include + +#define Py_BUILD_CORE +#if PY_VERSION_HEX >= 0x030d0000 +#include +#else +#include +#include +#endif // PY_VERSION_HEX >= 0x030d0000 +#else +#include +#include +#endif + +#include + +#include + +extern "C" { + +typedef enum +{ + STATE_PENDING, + STATE_CANCELLED, + STATE_FINISHED +} fut_state; + +#if PY_VERSION_HEX >= 0x030d0000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + PyObject* prefix##_cancelled_exc; \ + fut_state prefix##_state; \ + /* These bitfields need to be at the end of the struct \ + so that these and bitfields from TaskObj are contiguous. \ + */ \ + unsigned prefix##_log_tb : 1; \ + unsigned prefix##_blocking : 1; + +#elif PY_VERSION_HEX >= 0x030b0000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; \ + PyObject* prefix##_cancelled_exc; + +#elif PY_VERSION_HEX >= 0x030a0000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; \ + _PyErr_StackItem prefix##_cancelled_exc_state; + +#elif PY_VERSION_HEX >= 0x03090000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; \ + _PyErr_StackItem prefix##_cancelled_exc_state; + +#else +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; +#endif + +typedef struct +{ + FutureObj_HEAD(future) +} FutureObj; + +#if PY_VERSION_HEX >= 0x030d0000 +typedef struct +{ + FutureObj_HEAD(task); + unsigned task_must_cancel : 1; + unsigned task_log_destroy_pending : 1; + int task_num_cancels_requested; + PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; +} TaskObj; + +#elif PY_VERSION_HEX >= 0x030a0000 +typedef struct +{ + FutureObj_HEAD(task) PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; + int task_must_cancel; + int task_log_destroy_pending; + int task_num_cancels_requested; +} TaskObj; + +#else +typedef struct +{ + FutureObj_HEAD(task) PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; + int task_must_cancel; + int task_log_destroy_pending; +} TaskObj; +#endif + +// ---- cr_await ---- + +#if PY_VERSION_HEX >= 0x030c0000 +#define RESUME_QUICK INSTRUMENTED_RESUME +#endif + +#if PY_VERSION_HEX >= 0x030b0000 +inline PyObject* PyGen_yf(PyGenObject* gen, PyObject* frame_addr) +{ + PyObject* yf = NULL; + + if (gen->gi_frame_state < FRAME_CLEARED) + { + if (gen->gi_frame_state == FRAME_CREATED) + return NULL; + + _PyInterpreterFrame frame; + if (copy_type(frame_addr, frame)) + return NULL; + + _Py_CODEUNIT next; +#if PY_VERSION_HEX >= 0x030d0000 + if (copy_type(frame.instr_ptr, next)) +#else + if (copy_type(frame.prev_instr + 1, next)) +#endif + return NULL; + if (!(_Py_OPCODE(next) == RESUME || _Py_OPCODE(next) == RESUME_QUICK) || + _Py_OPARG(next) < 2) + return NULL; + + if (frame.stacktop < 1 || frame.stacktop > (1 << 20)) + return NULL; + + auto localsplus = std::make_unique(frame.stacktop); + if (copy_generic(frame.localsplus, localsplus.get(), frame.stacktop * sizeof(PyObject*))) + return NULL; + + yf = localsplus[frame.stacktop - 1]; + } + + return yf; +} + +#elif PY_VERSION_HEX >= 0x030a0000 +inline PyObject* PyGen_yf(PyGenObject* Py_UNUSED(gen), PyObject* frame_addr) +{ + PyObject* yf = NULL; + PyFrameObject* f = (PyFrameObject*)frame_addr; + + if (f) + { + PyFrameObject frame; + if (copy_type(f, frame)) + return NULL; + + if (frame.f_lasti < 0) + return NULL; + + PyCodeObject code; + if (copy_type(frame.f_code, code)) + return NULL; + + Py_ssize_t s = 0; + auto c = pybytes_to_bytes_and_size(code.co_code, &s); + if (c == nullptr) + return NULL; + + if (c[(frame.f_lasti + 1) * sizeof(_Py_CODEUNIT)] != YIELD_FROM) + return NULL; + + ssize_t nvalues = frame.f_stackdepth; + if (nvalues < 1 || nvalues > (1 << 20)) + return NULL; + + auto stack = std::make_unique(nvalues); + + if (copy_generic(frame.f_valuestack, stack.get(), nvalues * sizeof(PyObject*))) + return NULL; + + yf = stack[nvalues - 1]; + } + + return yf; +} + +#else +inline PyObject* PyGen_yf(PyGenObject* Py_UNUSED(gen), PyObject* frame_addr) +{ + PyObject* yf = NULL; + PyFrameObject* f = (PyFrameObject*)frame_addr; + + if (frame_addr == NULL) + return NULL; + + PyFrameObject frame; + if (copy_type(f, frame)) + return NULL; + + if (frame.f_stacktop) + { + if (frame.f_lasti < 0) + return NULL; + + PyCodeObject code; + if (copy_type(frame.f_code, code)) + return NULL; + + Py_ssize_t s = 0; + auto c = pybytes_to_bytes_and_size(code.co_code, &s); + if (c == nullptr) + return NULL; + + if (c[f->f_lasti + sizeof(_Py_CODEUNIT)] != YIELD_FROM) + return NULL; + + auto stacktop = std::make_unique(); + if (copy_generic(frame.f_stacktop - 1, stacktop.get(), sizeof(PyObject*))) + return NULL; + + yf = *stacktop; + } + + return yf; +} +#endif +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/errors.h b/ddtrace/internal/datadog/profiling/echion/echion/errors.h new file mode 100644 index 00000000000..c786d0aff36 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/errors.h @@ -0,0 +1,182 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include + +enum class ErrorKind { + Undefined, + LookupError, + PyBytesError, + BytecodeError, + FrameError, + MirrorError, + PyLongError, + PyUnicodeError, + UnwindError, + StackChunkError, + GenInfoError, + TaskInfoError, + TaskInfoGeneratorError, + ThreadInfoError, + CpuTimeError, + LocationError, + RendererError, +}; + +template +class [[nodiscard]] Result { +public: + // Factories + static Result ok(const T& v) { return Result(v); } + static Result ok(T&& v) { return Result(std::move(v)); } + static Result error(ErrorKind e) noexcept { return Result(e); } + + // Constructors + Result(const T& v) noexcept(std::is_nothrow_copy_constructible::value) + : success_(true) + { + ::new (static_cast(std::addressof(value_))) T(v); + } + + Result(T&& v) noexcept(std::is_nothrow_move_constructible::value) + : success_(true) + { + ::new (static_cast(std::addressof(value_))) T(std::move(v)); + } + + Result(ErrorKind e) noexcept + : success_(false) + { + error_ = e; + } + + // Destructor + ~Result() { reset(); } + + // Copy ctor + Result(const Result& other) + noexcept(std::is_nothrow_copy_constructible::value) + : success_(other.success_) + { + if (success_) { + ::new (static_cast(std::addressof(value_))) T(other.value_); + } else { + error_ = other.error_; + } + } + + // Move ctor + Result(Result&& other) noexcept(std::is_nothrow_move_constructible::value) + : success_(other.success_) + { + if (success_) { + ::new (static_cast(std::addressof(value_))) T(std::move(other.value_)); + } else { + error_ = other.error_; + } + } + + // Copy assignment + Result& operator=(const Result& other) + noexcept(std::is_nothrow_copy_constructible::value && + std::is_nothrow_copy_assignable::value) + { + if (this == &other) return *this; + + if (success_ && other.success_) { + value_ = other.value_; + } else if (success_ && !other.success_) { + value_.~T(); + success_ = false; + error_ = other.error_; + } else if (!success_ && other.success_) { + ::new (static_cast(std::addressof(value_))) T(other.value_); + success_ = true; + } else { // both errors + error_ = other.error_; + } + return *this; + } + + // Move assignment + Result& operator=(Result&& other) + noexcept(std::is_nothrow_move_constructible::value && + std::is_nothrow_move_assignable::value) + { + if (this == &other) return *this; + + if (success_ && other.success_) { + value_ = std::move(other.value_); + } else if (success_ && !other.success_) { + value_.~T(); + success_ = false; + error_ = other.error_; + } else if (!success_ && other.success_) { + ::new (static_cast(std::addressof(value_))) T(std::move(other.value_)); + success_ = true; + } else { // both errors + error_ = other.error_; + } + return *this; + } + + // Observers + explicit operator bool() const noexcept { return success_; } + + T& operator*() & { return value_; } + const T& operator*() const & { return value_; } + T&& operator*() && { return std::move(value_); } + + T* operator->() { return std::addressof(value_); } + const T* operator->() const { return std::addressof(value_); } + + bool has_value() const noexcept { return success_; } + + // If in error, returns default_value + template + T value_or(U&& default_value) const { + return success_ ? value_ : static_cast(std::forward(default_value)); + } + + // Returns ErrorKind::Undefined when holding a value + ErrorKind error() const noexcept { return success_ ? ErrorKind::Undefined : error_; } + +private: + // Active member is tracked by success_ + union { + ErrorKind error_; + T value_; + }; + bool success_; + + void reset() noexcept { + if (success_) { + value_.~T(); + } + } +}; + +// Specialization for void +template <> +class [[nodiscard]] Result { +public: + static Result ok() noexcept { return Result(true, ErrorKind::Undefined); } + static Result error(ErrorKind e) noexcept { return Result(false, e); } + Result(ErrorKind e) noexcept : success_(false), error_(e) {} + + explicit operator bool() const noexcept { return success_; } + bool has_value() const noexcept { return success_; } + + // Returns ErrorKind::Undefined when success + ErrorKind error() const noexcept { return success_ ? ErrorKind::Undefined : error_; } + +private: + bool success_; + ErrorKind error_; + + explicit Result(bool s, ErrorKind e) noexcept : success_(s), error_(e) {} +}; diff --git a/ddtrace/internal/datadog/profiling/echion/echion/frame.cc b/ddtrace/internal/datadog/profiling/echion/echion/frame.cc new file mode 100644 index 00000000000..701b185811c --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/frame.cc @@ -0,0 +1,500 @@ +#include + +#include +#include + +// ---------------------------------------------------------------------------- +#if PY_VERSION_HEX >= 0x030b0000 +static inline int _read_varint(unsigned char* table, ssize_t size, ssize_t* i) +{ + ssize_t guard = size - 1; + if (*i >= guard) + return 0; + + int val = table[++*i] & 63; + int shift = 0; + while (table[*i] & 64 && *i < guard) + { + shift += 6; + val |= (table[++*i] & 63) << shift; + } + return val; +} + +// ---------------------------------------------------------------------------- +static inline int _read_signed_varint(unsigned char* table, ssize_t size, ssize_t* i) +{ + int val = _read_varint(table, size, i); + return (val & 1) ? -(val >> 1) : (val >> 1); +} +#endif + +// ---------------------------------------------------------------------------- +void init_frame_cache(size_t capacity) +{ + frame_cache = new LRUCache(capacity); +} + +// ---------------------------------------------------------------------------- +void reset_frame_cache() +{ + delete frame_cache; + frame_cache = nullptr; +} + +// ------------------------------------------------------------------------ +Frame::Frame(PyObject* frame) +{ +#if PY_VERSION_HEX >= 0x030b0000 + +#if PY_VERSION_HEX >= 0x030d0000 + _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = reinterpret_cast(iframe->f_executable); +#else + const _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = iframe->f_code; +#endif // PY_VERSION_HEX >= 0x030d0000 + PyCode_Addr2Location(code, lasti << 1, &location.line, &location.column, &location.line_end, + &location.column_end); + location.column++; + location.column_end++; + name = string_table.key_unsafe(code->co_qualname); +#if PY_VERSION_HEX >= 0x030c0000 + is_entry = (iframe->owner == FRAME_OWNED_BY_CSTACK); // Shim frame +#else + is_entry = iframe->is_entry; +#endif + +#else + PyFrameObject* py_frame = reinterpret_cast(frame); + PyCodeObject* code = py_frame->f_code; + + location.line = PyFrame_GetLineNumber(py_frame); + name = string_table.key_unsafe(code->co_name); +#endif + filename = string_table.key_unsafe(code->co_filename); +} + +// ------------------------------------------------------------------------ +Result Frame::create(PyCodeObject* code, int lasti) +{ + auto maybe_filename = string_table.key(code->co_filename); + if (!maybe_filename) { + return ErrorKind::FrameError; + } + +#if PY_VERSION_HEX >= 0x030b0000 + auto maybe_name = string_table.key(code->co_qualname); +#else + auto maybe_name = string_table.key(code->co_name); +#endif + + if (!maybe_name) { + return ErrorKind::FrameError; + } + + auto frame = std::make_unique(*maybe_filename, *maybe_name); + auto infer_location_success = frame->infer_location(code, lasti); + if (!infer_location_success) { + return ErrorKind::LocationError; + } + + return frame; +} + +// ------------------------------------------------------------------------ +#ifndef UNWIND_NATIVE_DISABLE +Result Frame::create(unw_cursor_t& cursor, unw_word_t pc) +{ + auto filename = string_table.key(pc); + + auto maybe_name = string_table.key(cursor); + if (!maybe_name) { + return ErrorKind::FrameError; + } + + return std::make_unique(filename, *maybe_name); +} +#endif // UNWIND_NATIVE_DISABLE + +// ---------------------------------------------------------------------------- +Result Frame::infer_location(PyCodeObject* code_obj, int lasti) +{ + unsigned int lineno = code_obj->co_firstlineno; + Py_ssize_t len = 0; + +#if PY_VERSION_HEX >= 0x030b0000 + auto table = pybytes_to_bytes_and_size(code_obj->co_linetable, &len); + if (table == nullptr) { + return ErrorKind::LocationError; + } + + auto table_data = table.get(); + + for (Py_ssize_t i = 0, bc = 0; i < len; i++) + { + bc += (table[i] & 7) + 1; + int code = (table[i] >> 3) & 15; + unsigned char next_byte = 0; + switch (code) + { + case 15: + break; + + case 14: // Long form + lineno += _read_signed_varint(table_data, len, &i); + + this->location.line = lineno; + this->location.line_end = lineno + _read_varint(table_data, len, &i); + this->location.column = _read_varint(table_data, len, &i); + this->location.column_end = _read_varint(table_data, len, &i); + + break; + + case 13: // No column data + lineno += _read_signed_varint(table_data, len, &i); + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = this->location.column_end = 0; + + break; + + case 12: // New lineno + case 11: + case 10: + if (i >= len - 2) { + return ErrorKind::LocationError; + } + + lineno += code - 10; + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = 1 + table[++i]; + this->location.column_end = 1 + table[++i]; + + break; + + default: + if (i >= len - 1) { + return ErrorKind::LocationError; + } + + next_byte = table[++i]; + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = 1 + (code << 3) + ((next_byte >> 4) & 7); + this->location.column_end = this->location.column + (next_byte & 15); + } + + if (bc > lasti) + break; + } + +#elif PY_VERSION_HEX >= 0x030a0000 + auto table = pybytes_to_bytes_and_size(code_obj->co_linetable, &len); + if (table == nullptr) { + return ErrorKind::LocationError; + } + + lasti <<= 1; + for (int i = 0, bc = 0; i < len; i++) + { + int sdelta = table[i++]; + if (sdelta == 0xff) + break; + + bc += sdelta; + + int ldelta = table[i]; + if (ldelta == 0x80) + ldelta = 0; + else if (ldelta > 0x80) + lineno -= 0x100; + + lineno += ldelta; + if (bc > lasti) + break; + } + +#else + auto table = pybytes_to_bytes_and_size(code_obj->co_lnotab, &len); + if (table == nullptr) { + return ErrorKind::LocationError; + } + + for (int i = 0, bc = 0; i < len; i++) + { + bc += table[i++]; + if (bc > lasti) + break; + + if (table[i] >= 0x80) + lineno -= 0x100; + + lineno += table[i]; + } + +#endif + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = 0; + this->location.column_end = 0; + + return Result::ok(); +} + +// ------------------------------------------------------------------------ +Frame::Key Frame::key(PyCodeObject* code, int lasti) +{ + return ((static_cast(((reinterpret_cast(code)))) << 16) | lasti); +} + +// ---------------------------------------------------------------------------- +Frame::Key Frame::key(PyObject* frame) +{ +#if PY_VERSION_HEX >= 0x030d0000 + _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = reinterpret_cast(iframe->f_executable); +#elif PY_VERSION_HEX >= 0x030b0000 + const _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = iframe->f_code; +#else + const PyFrameObject* py_frame = reinterpret_cast(frame); + const int lasti = py_frame->f_lasti; + PyCodeObject* code = py_frame->f_code; +#endif + return key(code, lasti); +} + +// ------------------------------------------------------------------------ +#if PY_VERSION_HEX >= 0x030b0000 +Result> Frame::read(_PyInterpreterFrame* frame_addr, _PyInterpreterFrame** prev_addr) +#else +Result> Frame::read(PyObject* frame_addr, PyObject** prev_addr) +#endif +{ +#if PY_VERSION_HEX >= 0x030b0000 + _PyInterpreterFrame iframe; +#if PY_VERSION_HEX >= 0x030d0000 + // From Python versions 3.13, f_executable can have objects other than + // code objects for an internal frame. We need to skip some frames if + // its f_executable is not code as suggested here: + // https://github.com/python/cpython/issues/100987#issuecomment-1485556487 + PyObject f_executable; + + for (; frame_addr; frame_addr = frame_addr->previous) + { + auto resolved_addr = + stack_chunk ? reinterpret_cast<_PyInterpreterFrame*>(stack_chunk->resolve(frame_addr)) + : frame_addr; + if (resolved_addr != frame_addr) + { + frame_addr = resolved_addr; + } + else + { + if (copy_type(frame_addr, iframe)) + { + return ErrorKind::FrameError; + } + frame_addr = &iframe; + } + if (copy_type(frame_addr->f_executable, f_executable)) + { + return ErrorKind::FrameError; + } + if (f_executable.ob_type == &PyCode_Type) + { + break; + } + } + + if (frame_addr == NULL) + { + return ErrorKind::FrameError; + } +#else // PY_VERSION_HEX < 0x030d0000 + // Code Specific to Python < 3.13 and >= 3.11 + auto resolved_addr = + stack_chunk ? reinterpret_cast<_PyInterpreterFrame*>(stack_chunk->resolve(frame_addr)) + : frame_addr; + if (resolved_addr != frame_addr) + { + frame_addr = resolved_addr; + } + else + { + if (copy_type(frame_addr, iframe)) + { + return ErrorKind::FrameError; + } + frame_addr = &iframe; + } +#endif // PY_VERSION_HEX >= 0x030d0000 + + // We cannot use _PyInterpreterFrame_LASTI because _PyCode_CODE reads + // from the code object. +#if PY_VERSION_HEX >= 0x030d0000 + const int lasti = + (static_cast((frame_addr->instr_ptr - 1 - + reinterpret_cast<_Py_CODEUNIT*>( + (reinterpret_cast(frame_addr->f_executable)))))) - + offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT); + auto maybe_frame = Frame::get(reinterpret_cast(frame_addr->f_executable), lasti); + if (!maybe_frame) { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); +#else + const int lasti = (static_cast((frame_addr->prev_instr - + reinterpret_cast<_Py_CODEUNIT*>((frame_addr->f_code))))) - + offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT); + auto maybe_frame = Frame::get(frame_addr->f_code, lasti); + if (!maybe_frame) { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); +#endif // PY_VERSION_HEX >= 0x030d0000 + if (&frame != &INVALID_FRAME) + { +#if PY_VERSION_HEX >= 0x030c0000 + frame.is_entry = (frame_addr->owner == FRAME_OWNED_BY_CSTACK); // Shim frame +#else // PY_VERSION_HEX < 0x030c0000 + frame.is_entry = frame_addr->is_entry; +#endif // PY_VERSION_HEX >= 0x030c0000 + } + + *prev_addr = &frame == &INVALID_FRAME ? NULL : frame_addr->previous; + +#else // PY_VERSION_HEX < 0x030b0000 + // Unwind the stack from leaf to root and store it in a stack. This way we + // can print it from root to leaf. + PyFrameObject py_frame; + + if (copy_type(frame_addr, py_frame)) { + return ErrorKind::FrameError; + } + + auto maybe_frame = Frame::get(py_frame.f_code, py_frame.f_lasti); + if (!maybe_frame) { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); + *prev_addr = (&frame == &INVALID_FRAME) ? NULL : reinterpret_cast(py_frame.f_back); +#endif // PY_VERSION_HEX >= 0x030b0000 + + return std::ref(frame); +} + +// ---------------------------------------------------------------------------- +Result> Frame::get(PyCodeObject* code_addr, int lasti) +{ + auto frame_key = Frame::key(code_addr, lasti); + + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) { + return *maybe_frame; + } + + PyCodeObject code; + if (copy_type(code_addr, code)) { + return std::ref(INVALID_FRAME); + } + + auto maybe_new_frame = Frame::create(&code, lasti); + if (!maybe_new_frame) { + return std::ref(INVALID_FRAME); + } + + auto new_frame = std::move(*maybe_new_frame); + new_frame->cache_key = frame_key; + auto& f = *new_frame; + Renderer::get().frame(frame_key, new_frame->filename, new_frame->name, + new_frame->location.line, new_frame->location.line_end, + new_frame->location.column, new_frame->location.column_end); + frame_cache->store(frame_key, std::move(new_frame)); + return std::ref(f); +} + +// ---------------------------------------------------------------------------- +Frame& Frame::get(PyObject* frame) +{ + auto frame_key = Frame::key(frame); + + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) { + return *maybe_frame; + } + + auto new_frame = std::make_unique(frame); + new_frame->cache_key = frame_key; + auto& f = *new_frame; + Renderer::get().frame(frame_key, new_frame->filename, new_frame->name, + new_frame->location.line, new_frame->location.line_end, + new_frame->location.column, new_frame->location.column_end); + frame_cache->store(frame_key, std::move(new_frame)); + return f; +} + +// ---------------------------------------------------------------------------- +#ifndef UNWIND_NATIVE_DISABLE +Result> Frame::get(unw_cursor_t& cursor) +{ + unw_word_t pc; + unw_get_reg(&cursor, UNW_REG_IP, &pc); + if (pc == 0) { + return ErrorKind::FrameError; + } + + uintptr_t frame_key = (uintptr_t)pc; + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) { + return *maybe_frame; + } + + auto maybe_new_frame = Frame::create(cursor, pc); + if (!maybe_new_frame) { + return std::ref(UNKNOWN_FRAME); + } + + auto frame = std::move(*maybe_new_frame); + frame->cache_key = frame_key; + auto& f = *frame; + Renderer::get().frame(frame_key, frame->filename, frame->name, frame->location.line, + frame->location.line_end, frame->location.column, + frame->location.column_end); + frame_cache->store(frame_key, std::move(frame)); + return std::ref(f); +} +#endif // UNWIND_NATIVE_DISABLE + +// ---------------------------------------------------------------------------- +Frame& Frame::get(StringTable::Key name) +{ + uintptr_t frame_key = static_cast(name); + + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) { + return *maybe_frame; + } + + auto frame = std::make_unique(name); + frame->cache_key = frame_key; + auto& f = *frame; + Renderer::get().frame(frame_key, frame->filename, frame->name, frame->location.line, + frame->location.line_end, frame->location.column, + frame->location.column_end); + frame_cache->store(frame_key, std::move(frame)); + return f; +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/frame.h b/ddtrace/internal/datadog/profiling/echion/echion/frame.h new file mode 100644 index 00000000000..be4638fa278 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/frame.h @@ -0,0 +1,105 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#if PY_VERSION_HEX >= 0x030c0000 +// https://github.com/python/cpython/issues/108216#issuecomment-1696565797 +#undef _PyGC_FINALIZED +#endif +#include +#if PY_VERSION_HEX >= 0x030d0000 +#define Py_BUILD_CORE +#include +#endif // PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030b0000 +#define Py_BUILD_CORE +#include +#endif + +#include +#include +#include +#include + +#ifndef UNWIND_NATIVE_DISABLE +#include +#define UNW_LOCAL_ONLY +#include +#endif // UNWIND_NATIVE_DISABLE + +#include +#include +#if PY_VERSION_HEX >= 0x030b0000 +#include +#endif // PY_VERSION_HEX >= 0x030b0000 +#include +#include + +// ---------------------------------------------------------------------------- +class Frame +{ +public: + using Ref = std::reference_wrapper; + using Ptr = std::unique_ptr; + using Key = uintptr_t; + + // ------------------------------------------------------------------------ + Key cache_key = 0; + StringTable::Key filename = 0; + StringTable::Key name = 0; + + struct _location + { + int line = 0; + int line_end = 0; + int column = 0; + int column_end = 0; + } location; + +#if PY_VERSION_HEX >= 0x030b0000 + bool is_entry = false; +#endif + + // ------------------------------------------------------------------------ + Frame(StringTable::Key filename, StringTable::Key name) : filename(filename), name(name) {} + Frame(StringTable::Key name) : name(name) {}; + Frame(PyObject* frame); + [[nodiscard]] static Result create(PyCodeObject* code, int lasti); +#ifndef UNWIND_NATIVE_DISABLE + [[nodiscard]] static Result create(unw_cursor_t& cursor, unw_word_t pc); +#endif // UNWIND_NATIVE_DISABLE + +#if PY_VERSION_HEX >= 0x030b0000 + [[nodiscard]] static Result> read(_PyInterpreterFrame* frame_addr, _PyInterpreterFrame** prev_addr); +#else + [[nodiscard]] static Result> read(PyObject* frame_addr, PyObject** prev_addr); +#endif + + [[nodiscard]] static Result> get(PyCodeObject* code_addr, int lasti); + static Frame& get(PyObject* frame); +#ifndef UNWIND_NATIVE_DISABLE + [[nodiscard]] static Result> get(unw_cursor_t& cursor); +#endif // UNWIND_NATIVE_DISABLE + static Frame& get(StringTable::Key name); + +private: + [[nodiscard]] Result inline infer_location(PyCodeObject* code, int lasti); + static inline Key key(PyCodeObject* code, int lasti); + static inline Key key(PyObject* frame); +}; + +inline auto INVALID_FRAME = Frame(StringTable::INVALID); +inline auto UNKNOWN_FRAME = Frame(StringTable::UNKNOWN); + +// We make this a raw pointer to prevent its destruction on exit, since we +// control the lifetime of the cache. +inline LRUCache* frame_cache = nullptr; +void init_frame_cache(size_t capacity); +void reset_frame_cache(); diff --git a/ddtrace/internal/datadog/profiling/echion/echion/greenlets.h b/ddtrace/internal/datadog/profiling/echion/echion/greenlets.h new file mode 100644 index 00000000000..2989c6fe3c5 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/greenlets.h @@ -0,0 +1,96 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2025 Gabriele N. Tornetta . + +#pragma once + +#include +#define Py_BUILD_CORE + + +#include +#include + + +#define FRAME_NOT_SET Py_False // Sentinel for frame cell + + +class GreenletInfo +{ +public: + typedef std::unique_ptr Ptr; + typedef std::reference_wrapper Ref; + typedef uintptr_t ID; + + ID greenlet_id = 0; + StringTable::Key name; + PyObject* frame = NULL; + + GreenletInfo(ID id, PyObject* frame, StringTable::Key name) + : greenlet_id(id), frame(frame), name(name) + { + } + + int unwind(PyObject*, PyThreadState*, FrameStack&); +}; + +// ---------------------------------------------------------------------------- + +inline int GreenletInfo::unwind(PyObject* frame, PyThreadState* tstate, FrameStack& stack) +{ + PyObject* frame_addr = NULL; +#if PY_VERSION_HEX >= 0x030d0000 + frame_addr = + frame == Py_None + ? (PyObject*)tstate->current_frame + : reinterpret_cast(reinterpret_cast(frame)->f_frame); +#elif PY_VERSION_HEX >= 0x030b0000 + if (frame == Py_None) + { + _PyCFrame cframe; + _PyCFrame* cframe_addr = tstate->cframe; + if (copy_type(cframe_addr, cframe)) + // TODO: Invalid frame + return 0; + + frame_addr = (PyObject*)cframe.current_frame; + } + else + { + frame_addr = reinterpret_cast(reinterpret_cast(frame)->f_frame); + } + +#else // Python < 3.11 + frame_addr = frame == Py_None ? (PyObject*)tstate->frame : frame; +#endif + auto count = unwind_frame(frame_addr, stack); + + stack.push_back(Frame::get(name)); + + return count + 1; // We add an extra count for the frame with the greenlet + // name. +} + +// ---------------------------------------------------------------------------- + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline std::unordered_map& greenlet_info_map = + *(new std::unordered_map()); + +// maps greenlets to their parent +inline std::unordered_map& greenlet_parent_map = + *(new std::unordered_map()); + +// maps threads to any currently active greenlets +inline std::unordered_map& greenlet_thread_map = + *(new std::unordered_map()); + +inline std::mutex greenlet_info_map_lock; + +// ---------------------------------------------------------------------------- + +inline std::vector> current_greenlets; + +// ---------------------------------------------------------------------------- diff --git a/ddtrace/internal/datadog/profiling/echion/echion/interp.h b/ddtrace/internal/datadog/profiling/echion/echion/interp.h new file mode 100644 index 00000000000..87b1596e62b --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/interp.h @@ -0,0 +1,54 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#if PY_VERSION_HEX >= 0x03090000 +#define Py_BUILD_CORE +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#include +#endif + +#include + +#include +#include + + +class InterpreterInfo +{ +public: + int64_t id = 0; + void* tstate_head = NULL; + void* next = NULL; +}; + +static void for_each_interp(std::function callback) +{ + InterpreterInfo interpreter_info = {0}; + + for (char* interp_addr = (char*)runtime->interpreters.head; interp_addr != NULL; + interp_addr = (char*)interpreter_info.next) + { + if (copy_type(interp_addr + offsetof(PyInterpreterState, id), interpreter_info.id)) + continue; + + #if PY_VERSION_HEX >= 0x030b0000 + if (copy_type(interp_addr + offsetof(PyInterpreterState, threads.head), interpreter_info.tstate_head)) +#else + if (copy_type(interp_addr + offsetof(PyInterpreterState, tstate_head), interpreter_info.tstate_head)) +#endif + continue; + + if (copy_type(interp_addr + offsetof(PyInterpreterState, next), interpreter_info.next)) + continue; + + callback(interpreter_info); + }; +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/long.h b/ddtrace/internal/datadog/profiling/echion/echion/long.h new file mode 100644 index 00000000000..077fe8312d8 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/long.h @@ -0,0 +1,72 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . +#pragma once + +#include +#if PY_VERSION_HEX >= 0x030c0000 +#include +// Note: Even if use the right PYLONG_BITS_IN_DIGIT that is specified in the +// Python we use to build echion, it can be different from the Python that is +// used to run the program. +#if PYLONG_BITS_IN_DIGIT == 30 +typedef uint32_t digit; +#elif PYLONG_BITS_IN_DIGIT == 15 +typedef unsigned short digit; +#else +#error "Unsupported PYLONG_BITS_IN_DIGIT" +#endif // PYLONG_BITS_IN_DIGIT +#endif // PY_VERSION_HEX >= 0x030c0000 + +#include +#include + +constexpr Py_ssize_t MAX_DIGITS = 128; + +// ---------------------------------------------------------------------------- +#if PY_VERSION_HEX >= 0x030c0000 +[[nodiscard]] static Result pylong_to_llong(PyObject* long_addr) +{ + // Only used to extract a task-id on Python 3.12, omits overflow checks + PyLongObject long_obj; + long long ret = 0; + + if (copy_type(long_addr, long_obj)) + return ErrorKind::PyLongError; + + if (!PyLong_CheckExact(&long_obj)) + return ErrorKind::PyLongError; + + if (_PyLong_IsCompact(&long_obj)) + { + ret = (long long)_PyLong_CompactValue(&long_obj); + } + else + { + // If we're here, then we need to iterate over the digits + // We might overflow, but we don't care for now + int sign = _PyLong_NonCompactSign(&long_obj); + Py_ssize_t i = _PyLong_DigitCount(&long_obj); + + if (i > MAX_DIGITS) { + return ErrorKind::PyLongError; + } + + // Copy over the digits as ob_digit is allocated dynamically with + // PyObject_Malloc. + digit digits[MAX_DIGITS]; + if (copy_generic(long_obj.long_value.ob_digit, digits, i * sizeof(digit))) + { + return ErrorKind::PyLongError; + } + while (--i >= 0) + { + ret <<= PyLong_SHIFT; + ret |= digits[i]; + } + ret *= sign; + } + + return ret; +} +#endif diff --git a/ddtrace/internal/datadog/profiling/echion/echion/mirrors.h b/ddtrace/internal/datadog/profiling/echion/echion/mirrors.h new file mode 100644 index 00000000000..be02e511194 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/mirrors.h @@ -0,0 +1,209 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include "echion/errors.h" +#define PY_SSIZE_T_CLEAN +#include +#include +#include + +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#define Py_BUILD_CORE +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#include +#else +typedef struct +{ + Py_hash_t me_hash; + PyObject* me_key; + PyObject* me_value; /* This field is only meaningful for combined tables */ +} PyDictKeyEntry; + +typedef Py_ssize_t (*dict_lookup_func)(PyDictObject* mp, PyObject* key, Py_hash_t hash, + PyObject** value_addr); + +/* See dictobject.c for actual layout of DictKeysObject */ +typedef struct _dictkeysobject +{ + Py_ssize_t dk_refcnt; + + /* Size of the hash table (dk_indices). It must be a power of 2. */ + Py_ssize_t dk_size; + + dict_lookup_func dk_lookup; + + /* Number of usable entries in dk_entries. */ + Py_ssize_t dk_usable; + + /* Number of used entries in dk_entries. */ + Py_ssize_t dk_nentries; + + char dk_indices[]; /* char is required to avoid strict aliasing. */ + +} PyDictKeysObject; + +typedef PyObject* PyDictValues; +#endif + +#include + +#include + +class MirrorObject +{ +public: + [[nodiscard]] inline Result reflect() + { + if (reflected == NULL) + return ErrorKind::MirrorError; + + return reflected; + } + +protected: + MirrorObject(std::unique_ptr data, PyObject* reflected) : data(std::move(data)), reflected(reflected) {} + + std::unique_ptr data = nullptr; + PyObject* reflected = NULL; +}; + +// ---------------------------------------------------------------------------- +class MirrorDict : public MirrorObject +{ +public: + [[nodiscard]] static inline Result create(PyObject* dict_addr) noexcept; + + [[nodiscard]] Result get_item(PyObject* key) + { + auto maybe_reflected = reflect(); + if (!maybe_reflected) { + return maybe_reflected; + } + + return PyDict_GetItem(reflected, key); + } + +private: + MirrorDict(PyDictObject dict, std::unique_ptr data, PyObject* reflected) : MirrorObject(std::move(data), reflected), dict(dict) {} + PyDictObject dict; +}; + +[[nodiscard]] inline Result MirrorDict::create(PyObject* dict_addr) noexcept +{ + PyDictObject dict; + + if (copy_type(dict_addr, dict)) { + return ErrorKind::MirrorError; + } + + PyDictKeysObject keys; + if (copy_type(dict.ma_keys, keys)) { + return ErrorKind::MirrorError; + } + + // Compute the full dictionary data size +#if PY_VERSION_HEX >= 0x030b0000 + size_t entry_size = + keys.dk_kind == DICT_KEYS_UNICODE ? sizeof(PyDictUnicodeEntry) : sizeof(PyDictKeyEntry); + size_t keys_size = sizeof(PyDictKeysObject) + (1 << keys.dk_log2_index_bytes) + + (keys.dk_nentries * entry_size); +#else + size_t entry_size = sizeof(PyDictKeyEntry); + size_t keys_size = sizeof(PyDictKeysObject) + (keys.dk_size * sizeof(Py_ssize_t)) + + (keys.dk_nentries * entry_size); +#endif + size_t values_size = dict.ma_values != NULL ? keys.dk_nentries * sizeof(PyObject*) : 0; + + // Allocate the buffer + ssize_t data_size = keys_size + (keys.dk_nentries * entry_size) + values_size; + if (data_size < 0 || data_size > (1 << 20)) { + return ErrorKind::MirrorError; + } + + auto data = std::make_unique(data_size); + + // Copy the key data and update the pointer + if (copy_generic(dict.ma_keys, data.get(), keys_size)) { + return ErrorKind::MirrorError; + } + + dict.ma_keys = (PyDictKeysObject*)data.get(); + + if (dict.ma_values != NULL) + { + // Copy the value data and update the pointer + char* values_addr = data.get() + keys_size; + if (copy_generic(dict.ma_values, keys_size, values_size)) { + return ErrorKind::MirrorError; + } + + dict.ma_values = (PyDictValues*)values_addr; + } + + auto reflected = (PyObject*)&dict; + return MirrorDict(dict, std::move(data), reflected); +} + +// ---------------------------------------------------------------------------- +class MirrorSet : public MirrorObject +{ +public: + [[nodiscard]] inline static Result create(PyObject*); + [[nodiscard]] Result> as_unordered_set(); + +private: + MirrorSet(size_t size, PySetObject set, std::unique_ptr data, PyObject* reflected) : MirrorObject(std::move(data), reflected), size(size), set(set) {} + + size_t size; + PySetObject set; +}; + +[[nodiscard]] inline Result MirrorSet::create(PyObject* set_addr) +{ + PySetObject set; + + if (copy_type(set_addr, set)) { + return ErrorKind::MirrorError; + } + + auto size = set.mask + 1; + ssize_t table_size = size * sizeof(setentry); + if (table_size < 0 || table_size > (1 << 20)) { + return ErrorKind::MirrorError; + } + + auto data = std::make_unique(table_size); + if (copy_generic(set.table, data.get(), table_size)) { + return ErrorKind::MirrorError; + } + + set.table = (setentry*)data.get(); + + auto reflected = (PyObject*)&set; + return MirrorSet(size, set, std::move(data), reflected); +} + +[[nodiscard]] inline Result> MirrorSet::as_unordered_set() +{ + if (data == nullptr) { + return ErrorKind::MirrorError; + } + + std::unordered_set uset; + + for (size_t i = 0; i < size; i++) + { + auto entry = set.table[i]; + if (entry.key != NULL) + uset.insert(entry.key); + } + + return uset; +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/render.h b/ddtrace/internal/datadog/profiling/echion/echion/render.h new file mode 100644 index 00000000000..e67dcdb9663 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/render.h @@ -0,0 +1,465 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// Forward declaration +class Frame; + +enum MetricType +{ + Time, + Memory +}; + +class RendererInterface +{ +public: + [[nodiscard]] virtual Result open() = 0; + virtual void close() = 0; + virtual void header() = 0; + virtual void metadata(const std::string& label, const std::string& value) = 0; + // If a renderer has its own caching mechanism for frames, this can be used + // to store frame information. + virtual void frame(mojo_ref_t key, mojo_ref_t filename, mojo_ref_t name, mojo_int_t line, + mojo_int_t line_end, mojo_int_t column, mojo_int_t column_end) = 0; + // Refers to the frame stored using above function + virtual void frame_ref(mojo_ref_t key) = 0; + virtual void frame_kernel(const std::string& scope) = 0; + // Simlar to frame/frame_ref functions, helpers for string tables + virtual void string(mojo_ref_t key, const std::string& value) = 0; + virtual void string_ref(mojo_ref_t key) = 0; + + virtual void render_message(std::string_view msg) = 0; + virtual void render_thread_begin(PyThreadState* tstate, std::string_view name, + microsecond_t cpu_time, uintptr_t thread_id, + unsigned long native_id) = 0; + virtual void render_task_begin(std::string task_name, bool on_cpu) = 0; + virtual void render_stack_begin(long long pid, long long iid, + const std::string& thread_name) = 0; + virtual void render_frame(Frame& frame) = 0; + virtual void render_cpu_time(uint64_t cpu_time) = 0; + virtual void render_stack_end(MetricType metric_type, uint64_t delta) = 0; + + // The validity of the interface is a two-step process + // 1. If the RendererInterface has been destroyed, obviously it's invalid + // 2. There might be state behind RendererInterface, and the lifetime of that + // state alone may be insufficient to know its usability. is_valid + // should return false in such cases. + virtual bool is_valid() = 0; + virtual ~RendererInterface() = default; +}; + +class WhereRenderer : public RendererInterface +{ +private: + std::ostream* output; + std::ofstream file_stream; + + WhereRenderer() {} + ~WhereRenderer() {} + +public: + static WhereRenderer& get() + { + static WhereRenderer instance; + return instance; + } + + WhereRenderer(WhereRenderer&) = delete; + WhereRenderer(WhereRenderer&&) = delete; + void operator=(const WhereRenderer&) = delete; + + bool set_output(std::string_view file_name) + { + file_stream.close(); + file_stream.open(file_name.data(), std::ios::out); + if (file_stream.is_open()) + { + output = &file_stream; + return true; + } + return false; + } + + bool set_output(std::ostream& new_output) + { + file_stream.close(); + output = &new_output; + return true; + } + + [[nodiscard]] Result open() override { return Result::ok(); }; + void close() override {}; + void header() override {}; + void metadata(const std::string&, const std::string&) override {}; + void frame(mojo_ref_t, mojo_ref_t, mojo_ref_t, mojo_int_t, mojo_int_t, mojo_int_t, mojo_int_t) override {}; + void frame_ref(mojo_ref_t) override {}; + void frame_kernel(const std::string&) override {}; + void string(mojo_ref_t, const std::string&) override {}; + void string_ref(mojo_ref_t) override {}; + + void render_thread_begin(PyThreadState*, std::string_view name, microsecond_t, uintptr_t, unsigned long) override + { + *output << " 🧵 " << name << ":" << std::endl; + } + void render_task_begin(std::string, bool) override {} + void render_stack_begin(long long, long long, const std::string&) override {} + void render_message(std::string_view msg) override + { + *output << msg << std::endl; + } + void render_frame(Frame&) override; + void render_stack_end(MetricType, uint64_t) override {} + void render_cpu_time(uint64_t) override {} + + bool is_valid() override + { + return true; + } +}; + +class MojoRenderer : public RendererInterface +{ + std::ofstream output; + std::mutex lock; + uint64_t metric = 0; + + void inline event(MojoEvent event) + { + output.put((char)event); + } + void inline string(const std::string& string) + { + output << string << '\0'; + } + void inline string(const char* string) + { + output << string << '\0'; + } + void inline ref(mojo_ref_t value) + { + integer(value); + } + void inline integer(mojo_int_t n) + { + mojo_uint_t integer = n < 0 ? -n : n; + bool sign = n < 0; + + unsigned char byte = integer & 0x3f; + if (sign) + byte |= 0x40; + + integer >>= 6; + if (integer) + byte |= 0x80; + + output.put(byte); + + while (integer) + { + byte = integer & 0x7f; + integer >>= 7; + if (integer) + byte |= 0x80; + output.put(byte); + } + } + +public: + MojoRenderer() = default; + + [[nodiscard]] Result open() override + { + output.open(std::getenv("ECHION_OUTPUT")); + if (!output.is_open()) + { + std::cerr << "Failed to open output file " << std::getenv("ECHION_OUTPUT") << std::endl; + return ErrorKind::RendererError; + } + + return Result::ok(); + } + + // ------------------------------------------------------------------------ + void close() override + { + std::lock_guard guard(lock); + + output.flush(); + output.close(); + } + + // ------------------------------------------------------------------------ + void inline header() override + { + std::lock_guard guard(lock); + + output << "MOJ"; + integer(MOJO_VERSION); + } + + // ------------------------------------------------------------------------ + void inline metadata(const std::string& label, const std::string& value) override + { + std::lock_guard guard(lock); + + event(MOJO_METADATA); + string(label); + string(value); + } + + // ------------------------------------------------------------------------ + void inline stack(mojo_int_t pid, mojo_int_t iid, const std::string& thread_name) + { + std::lock_guard guard(lock); + + event(MOJO_STACK); + integer(pid); + integer(iid); + string(thread_name); + } + + // ------------------------------------------------------------------------ + void inline frame(mojo_ref_t key, mojo_ref_t filename, mojo_ref_t name, mojo_int_t line, + mojo_int_t line_end, mojo_int_t column, mojo_int_t column_end) override + { + std::lock_guard guard(lock); + + event(MOJO_FRAME); + ref(key); + ref(filename); + ref(name); + integer(line); + integer(line_end); + integer(column); + integer(column_end); + } + + // ------------------------------------------------------------------------ + void inline frame_ref(mojo_ref_t key) override + { + std::lock_guard guard(lock); + + if (key == 0) + { + event(MOJO_FRAME_INVALID); + } + else + { + event(MOJO_FRAME_REF); + ref(key); + } + } + + // ------------------------------------------------------------------------ + void inline frame_kernel(const std::string& scope) override + { + std::lock_guard guard(lock); + + event(MOJO_FRAME_KERNEL); + string(scope); + } + + // ------------------------------------------------------------------------ + void inline metric_time(mojo_int_t value) + { + std::lock_guard guard(lock); + + event(MOJO_METRIC_TIME); + integer(value); + } + + // ------------------------------------------------------------------------ + void inline metric_memory(mojo_int_t value) + { + std::lock_guard guard(lock); + + event(MOJO_METRIC_MEMORY); + integer(value); + } + + // ------------------------------------------------------------------------ + void inline string(mojo_ref_t key, const std::string& value) override + { + std::lock_guard guard(lock); + + event(MOJO_STRING); + ref(key); + string(value); + } + + // ------------------------------------------------------------------------ + void inline string_ref(mojo_ref_t key) override + { + std::lock_guard guard(lock); + + event(MOJO_STRING_REF); + ref(key); + } + + void render_message(std::string_view) override {}; + void render_thread_begin(PyThreadState*, std::string_view, microsecond_t, uintptr_t, unsigned long) override {}; + void render_task_begin(std::string, bool) override {}; + void render_stack_begin(long long pid, long long iid, const std::string& name) override + { + stack(pid, iid, name); + }; + void render_frame(Frame& frame) override; + void render_cpu_time(uint64_t cpu_time) override + { + metric = cpu_time; + }; + void render_stack_end(MetricType metric_type, uint64_t delta) override + { + if (metric_type == MetricType::Time) + { + metric_time(cpu ? metric : delta); + } + else if (metric_type == MetricType::Memory) + { + metric_memory(delta); + } + }; + bool is_valid() override + { + return true; + } +}; + +class Renderer +{ +private: + std::shared_ptr default_renderer = std::make_shared(); + std::weak_ptr currentRenderer; + + std::shared_ptr getActiveRenderer() + { + if (auto renderer = currentRenderer.lock()) + { + if (renderer->is_valid()) + { + return renderer; + } + } + return default_renderer; + } + + Renderer() = default; + ~Renderer() = default; + +public: + Renderer(const Renderer&) = delete; + Renderer& operator=(const Renderer&) = delete; + + static Renderer& get() + { + static Renderer instance; + return instance; + } + + void set_renderer(std::shared_ptr renderer) + { + currentRenderer = renderer; + } + + void header() + { + getActiveRenderer()->header(); + } + + void metadata(const std::string& label, const std::string& value) + { + getActiveRenderer()->metadata(label, value); + } + + void string(mojo_ref_t key, const std::string& value) + { + getActiveRenderer()->string(key, value); + } + + void frame(mojo_ref_t key, mojo_ref_t filename, mojo_ref_t name, mojo_int_t line, + mojo_int_t line_end, mojo_int_t column, mojo_int_t column_end) + { + getActiveRenderer()->frame(key, filename, name, line, line_end, column, column_end); + } + + void frame_ref(mojo_ref_t key) + { + getActiveRenderer()->frame_ref(key); + } + + void frame_kernel(const std::string& scope) + { + getActiveRenderer()->frame_kernel(scope); + } + + void string(mojo_ref_t key, const char* value) + { + getActiveRenderer()->string(key, value); + } + + void string_ref(mojo_ref_t key) + { + getActiveRenderer()->string_ref(key); + } + + void render_message(std::string_view msg) + { + getActiveRenderer()->render_message(msg); + } + + [[nodiscard]] Result open() + { + return getActiveRenderer()->open(); + } + + void close() + { + getActiveRenderer()->close(); + } + + void render_thread_begin(PyThreadState* tstate, std::string_view name, microsecond_t cpu_time, + uintptr_t thread_id, unsigned long native_id) + { + getActiveRenderer()->render_thread_begin(tstate, name, cpu_time, thread_id, native_id); + } + + void render_task_begin(std::string task_name, bool on_cpu) + { + getActiveRenderer()->render_task_begin(task_name, on_cpu); + } + + void render_stack_begin(long long pid, long long iid, const std::string& thread_name) + { + getActiveRenderer()->render_stack_begin(pid, iid, thread_name); + } + + void render_frame(Frame& frame) + { + getActiveRenderer()->render_frame(frame); + } + + void render_cpu_time(uint64_t cpu_time) + { + getActiveRenderer()->render_cpu_time(cpu_time); + } + + void render_stack_end(MetricType metric_type, uint64_t delta) + { + getActiveRenderer()->render_stack_end(metric_type, delta); + } +}; diff --git a/ddtrace/internal/datadog/profiling/echion/echion/signals.h b/ddtrace/internal/datadog/profiling/echion/echion/signals.h new file mode 100644 index 00000000000..6dacf7fad19 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/signals.h @@ -0,0 +1,56 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include + +#include +#include + +// ---------------------------------------------------------------------------- + +inline std::mutex sigprof_handler_lock; + +// ---------------------------------------------------------------------------- +inline void sigprof_handler([[maybe_unused]] int signum) +{ +#ifndef UNWIND_NATIVE_DISABLE + unwind_native_stack(); +#endif // UNWIND_NATIVE_DISABLE + unwind_python_stack(current_tstate); + // NOTE: Native stacks for tasks is non-trivial, so we skip it for now. + + sigprof_handler_lock.unlock(); +} + +// ---------------------------------------------------------------------------- +inline void sigquit_handler([[maybe_unused]] int signum) +{ + // Wake up the where thread + std::lock_guard lock(where_lock); + where_cv.notify_one(); +} + +// ---------------------------------------------------------------------------- +inline void install_signals() +{ + signal(SIGQUIT, sigquit_handler); + + if (native) + signal(SIGPROF, sigprof_handler); +} + +// ---------------------------------------------------------------------------- +inline void restore_signals() +{ + signal(SIGQUIT, SIG_DFL); + + if (native) + signal(SIGPROF, SIG_DFL); +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/stack_chunk.h b/ddtrace/internal/datadog/profiling/echion/echion/stack_chunk.h new file mode 100644 index 00000000000..5ffb8718f05 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/stack_chunk.h @@ -0,0 +1,108 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#define Py_BUILD_CORE +#include + +#include +#include + +#include +#include + + +// ---------------------------------------------------------------------------- +class StackChunk +{ +public: + StackChunk() {} + + [[nodiscard]] inline Result update(_PyStackChunk* chunk_addr); + inline void* resolve(void* frame_addr); + inline bool is_valid() const; + +private: + void* origin = NULL; + std::vector data; + size_t data_capacity = 0; + std::unique_ptr previous = nullptr; +}; + +// ---------------------------------------------------------------------------- +Result StackChunk::update(_PyStackChunk* chunk_addr) +{ + _PyStackChunk chunk; + + if (copy_type(chunk_addr, chunk)) { + return ErrorKind::StackChunkError; + } + + origin = chunk_addr; + // if data_capacity is not enough, reallocate. + if (chunk.size > data_capacity) + { + data_capacity = std::max(chunk.size, data_capacity); + data.resize(data_capacity); + } + + // Copy the data up until the size of the chunk + if (copy_generic(chunk_addr, data.data(), chunk.size)) { + return ErrorKind::StackChunkError; + } + + if (chunk.previous != NULL) + { + if (previous == nullptr) + previous = std::make_unique(); + + auto update_success = previous->update((_PyStackChunk*)chunk.previous); + if (!update_success) { + previous = nullptr; + } + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +void* StackChunk::resolve(void* address) +{ + // If data is not properly initialized, simply return the address + if (!is_valid()) + { + return address; + } + + _PyStackChunk* chunk = (_PyStackChunk*)data.data(); + + // Check if this chunk contains the address + if (address >= origin && address < (char*)origin + chunk->size) + return (char*)chunk + ((char*)address - (char*)origin); + + if (previous) + return previous->resolve(address); + + return address; +} + +// ---------------------------------------------------------------------------- +bool StackChunk::is_valid() const +{ + return data_capacity > 0 && + data.size() > 0 && + data.size() >= sizeof(_PyStackChunk) && + data.data() != nullptr && + origin != nullptr; +} + +// ---------------------------------------------------------------------------- + +inline std::unique_ptr stack_chunk = nullptr; diff --git a/ddtrace/internal/datadog/profiling/echion/echion/stacks.h b/ddtrace/internal/datadog/profiling/echion/echion/stacks.h new file mode 100644 index 00000000000..628a68d2488 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/stacks.h @@ -0,0 +1,382 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include +#include +#include + +#ifndef UNWIND_NATIVE_DISABLE +#define UNW_LOCAL_ONLY +#include +#endif // UNWIND_NATIVE_DISABLE + +#include +#include +#include +#if PY_VERSION_HEX >= 0x030b0000 +#include "echion/stack_chunk.h" +#endif // PY_VERSION_HEX >= 0x030b0000 +#include + +// ---------------------------------------------------------------------------- + +class FrameStack : public std::deque +{ +public: + using Ptr = std::unique_ptr; + using Key = Frame::Key; + + // ------------------------------------------------------------------------ + Key key() + { + Key h = 0; + + for (auto it = this->begin(); it != this->end(); ++it) + h = rotl(h) ^ (*it).get().cache_key; + + return h; + } + + // ------------------------------------------------------------------------ + void render() + { + for (auto it = this->rbegin(); it != this->rend(); ++it) + { +#if PY_VERSION_HEX >= 0x030c0000 + if ((*it).get().is_entry) + // This is a shim frame so we skip it. + continue; +#endif + Renderer::get().render_frame((*it).get()); + } + } + + // ------------------------------------------------------------------------ + void render_where() + { + for (auto it = this->rbegin(); it != this->rend(); ++it) + { +#if PY_VERSION_HEX >= 0x030c0000 + if ((*it).get().is_entry) + // This is a shim frame so we skip it. + continue; +#endif + WhereRenderer::get().render_frame((*it).get()); + } + } + +private: + // ------------------------------------------------------------------------ + static inline Frame::Key rotl(Key key) + { + return (key << 1) | (key >> (CHAR_BIT * sizeof(key) - 1)); + } +}; + +// ---------------------------------------------------------------------------- + +inline FrameStack python_stack; +inline FrameStack native_stack; +inline FrameStack interleaved_stack; + +// ---------------------------------------------------------------------------- +#ifndef UNWIND_NATIVE_DISABLE +inline void unwind_native_stack() +{ + unw_cursor_t cursor; + unw_context_t context; + + unw_getcontext(&context); + unw_init_local(&cursor, &context); + + native_stack.clear(); + + while (unw_step(&cursor) > 0 && native_stack.size() < max_frames) + { + auto maybe_frame = Frame::get(cursor); + if (!maybe_frame) { + break; + } + + native_stack.push_back(*maybe_frame); + } +} +#endif // UNWIND_NATIVE_DISABLE + +// ---------------------------------------------------------------------------- +static size_t unwind_frame(PyObject* frame_addr, FrameStack& stack) +{ + std::unordered_set seen_frames; // Used to detect cycles in the stack + int count = 0; + + PyObject* current_frame_addr = frame_addr; + while (current_frame_addr != NULL && stack.size() < max_frames) + { + if (seen_frames.find(current_frame_addr) != seen_frames.end()) + break; + + seen_frames.insert(current_frame_addr); + +#if PY_VERSION_HEX >= 0x030b0000 + auto maybe_frame = + Frame::read(reinterpret_cast<_PyInterpreterFrame*>(current_frame_addr), + reinterpret_cast<_PyInterpreterFrame**>(¤t_frame_addr)); +#else + auto maybe_frame = Frame::read(current_frame_addr, ¤t_frame_addr); +#endif + if (!maybe_frame) { + break; + } + + stack.push_back(*maybe_frame); + count++; + } + + return count; +} + +// ---------------------------------------------------------------------------- +static size_t unwind_frame_unsafe(PyObject* frame, FrameStack& stack) +{ + std::unordered_set seen_frames; // Used to detect cycles in the stack + int count = 0; + + PyObject* current_frame = frame; + while (current_frame != NULL && stack.size() < max_frames) + { + if (seen_frames.find(current_frame) != seen_frames.end()) + break; + +#if PY_VERSION_HEX >= 0x030d0000 + // See the comment in unwind_frame() + while (current_frame != NULL) + { + if (((_PyInterpreterFrame*)current_frame)->f_executable->ob_type == &PyCode_Type) + { + break; + } + current_frame = (PyObject*)((_PyInterpreterFrame*)current_frame)->previous; + } + + if (current_frame == NULL) + { + break; + } +#endif // PY_VERSION_HEX >= 0x030d0000 + count++; + + seen_frames.insert(current_frame); + + stack.push_back(Frame::get(current_frame)); + +#if PY_VERSION_HEX >= 0x030b0000 + current_frame = (PyObject*)((_PyInterpreterFrame*)current_frame)->previous; +#else + current_frame = (PyObject*)((PyFrameObject*)current_frame)->f_back; +#endif + } + + return count; +} + +// ---------------------------------------------------------------------------- +static void unwind_python_stack(PyThreadState* tstate, FrameStack& stack) +{ + stack.clear(); +#if PY_VERSION_HEX >= 0x030b0000 + if (stack_chunk == nullptr) + { + stack_chunk = std::make_unique(); + } + + if (!stack_chunk->update((_PyStackChunk*)tstate->datastack_chunk)) { + stack_chunk = nullptr; + } +#endif + +#if PY_VERSION_HEX >= 0x030d0000 + PyObject* frame_addr = (PyObject*)tstate->current_frame; +#elif PY_VERSION_HEX >= 0x030b0000 + _PyCFrame cframe; + _PyCFrame* cframe_addr = tstate->cframe; + if (copy_type(cframe_addr, cframe)) + // TODO: Invalid frame + return; + + PyObject* frame_addr = (PyObject*)cframe.current_frame; +#else // Python < 3.11 + PyObject* frame_addr = (PyObject*)tstate->frame; +#endif + unwind_frame(frame_addr, stack); +} + +// ---------------------------------------------------------------------------- +static void unwind_python_stack_unsafe(PyThreadState* tstate, FrameStack& stack) +{ + stack.clear(); +#if PY_VERSION_HEX >= 0x030b0000 + if (stack_chunk == nullptr) + { + stack_chunk = std::make_unique(); + } + + if (!stack_chunk->update((_PyStackChunk*)tstate->datastack_chunk)) { + stack_chunk = nullptr; + } +#endif + +#if PY_VERSION_HEX >= 0x030d0000 + PyObject* frame_addr = (PyObject*)tstate->current_frame; +#elif PY_VERSION_HEX >= 0x030b0000 + PyObject* frame_addr = (PyObject*)tstate->cframe->current_frame; +#else // Python < 3.11 + PyObject* frame_addr = (PyObject*)tstate->frame; +#endif + unwind_frame_unsafe(frame_addr, stack); +} + +// ---------------------------------------------------------------------------- +static void unwind_python_stack(PyThreadState* tstate) +{ + unwind_python_stack(tstate, python_stack); +} + +// ---------------------------------------------------------------------------- +static Result interleave_stacks(FrameStack& python_stack) +{ + interleaved_stack.clear(); + + auto p = python_stack.rbegin(); + // The last two frames are usually the signal trampoline and the signal + // handler. We skip them. + for (auto n = native_stack.rbegin(); n != native_stack.rend() - 2; ++n) + { + auto native_frame = *n; + + auto maybe_name = string_table.lookup(native_frame.get().name); + if (!maybe_name) { + return ErrorKind::LookupError; + } + + auto name = *maybe_name; + if (name->find("PyEval_EvalFrameDefault") != + std::string::npos) + { + if (p == python_stack.rend()) + { + // We expected a Python frame but we found none, so we report + // the native frame instead. + std::cerr << "Expected Python frame(s), found none!" << std::endl; + interleaved_stack.push_front(native_frame); + } + else + { + // We skip the PyEval_EvalFrameDefault frame because it is the + // function that calls the Python code. +#if PY_VERSION_HEX >= 0x030b0000 + int cframe_count = 0; + while (p != python_stack.rend()) + { + // The Python stack will start with an entry frame at the top. + // We stop popping at the next entry frame. + cframe_count += (*p).get().is_entry; + if (cframe_count >= 2) + break; + + interleaved_stack.push_front(*p++); + } +#else + interleaved_stack.push_front(*p++); +#endif + } + } + else + interleaved_stack.push_front(native_frame); + } + + if (p != python_stack.rend()) + { + std::cerr << "Python stack not empty after interleaving!" << std::endl; + while (p != python_stack.rend()) + interleaved_stack.push_front(*p++); + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +static Result interleave_stacks() +{ + return interleave_stacks(python_stack); +} + +// ---------------------------------------------------------------------------- +class StackInfo +{ +public: + StringTable::Key task_name; + bool on_cpu; + FrameStack stack; + + StackInfo(StringTable::Key task_name, bool on_cpu) : task_name(task_name), on_cpu(on_cpu) {} +}; + +// ---------------------------------------------------------------------------- +// This table is used to store entire stacks and index them by key. This is +// used when profiling memory events to account for deallocations. +class StackTable +{ +public: + // ------------------------------------------------------------------------ + FrameStack::Key inline store(FrameStack::Ptr stack) + { + std::lock_guard lock(this->lock); + + auto stack_key = stack->key(); + + auto stack_entry = table.find(stack_key); + if (stack_entry == table.end()) + { + table.emplace(stack_key, std::move(stack)); + } + else + { + // TODO: Check for collisions. + } + + return stack_key; + } + + // ------------------------------------------------------------------------ + FrameStack& retrieve(FrameStack::Key stack_key) + { + std::lock_guard lock(this->lock); + + return *table.find(stack_key)->second; + } + + // ------------------------------------------------------------------------ + void clear() + { + std::lock_guard lock(this->lock); + + table.clear(); + } + +private: + std::unordered_map> table; + std::mutex lock; +}; + +// ---------------------------------------------------------------------------- +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline auto& stack_table = *(new StackTable()); diff --git a/ddtrace/internal/datadog/profiling/echion/echion/state.h b/ddtrace/internal/datadog/profiling/echion/echion/state.h new file mode 100644 index 00000000000..848c4b64e50 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/state.h @@ -0,0 +1,32 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#define Py_BUILD_CORE +#include + +#include +#include +#include + +inline _PyRuntimeState* runtime = &_PyRuntime; +inline PyThreadState* current_tstate = NULL; + +inline std::thread* sampler_thread = nullptr; + +inline int running = 0; + +inline std::thread* where_thread = nullptr; +inline std::condition_variable where_cv; +inline std::mutex where_lock; + +inline PyObject* asyncio_current_tasks = NULL; +inline PyObject* asyncio_scheduled_tasks = NULL; // WeakSet +inline PyObject* asyncio_eager_tasks = NULL; // set diff --git a/ddtrace/internal/datadog/profiling/echion/echion/strings.h b/ddtrace/internal/datadog/profiling/echion/echion/strings.h new file mode 100644 index 00000000000..c671b4b5328 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/strings.h @@ -0,0 +1,235 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#include + +#include +#include + +#ifndef UNWIND_NATIVE_DISABLE +#include +#define UNW_LOCAL_ONLY +#include +#endif // UNWIND_NATIVE_DISABLE + + +#include +#include +#include + + +// ---------------------------------------------------------------------------- +static std::unique_ptr pybytes_to_bytes_and_size(PyObject* bytes_addr, + Py_ssize_t* size) +{ + PyBytesObject bytes; + + if (copy_type(bytes_addr, bytes)) + return nullptr; + + *size = bytes.ob_base.ob_size; + if (*size < 0 || *size > (1 << 20)) + return nullptr; + + auto data = std::make_unique(*size); + if (copy_generic(((char*)bytes_addr) + offsetof(PyBytesObject, ob_sval), data.get(), *size)) + return nullptr; + + return data; +} + +// ---------------------------------------------------------------------------- +static Result pyunicode_to_utf8(PyObject* str_addr) +{ + PyUnicodeObject str; + if (copy_type(str_addr, str)) + return ErrorKind::PyUnicodeError; + + PyASCIIObject& ascii = str._base._base; + + if (ascii.state.kind != 1) + return ErrorKind::PyUnicodeError; + + const char* data = ascii.state.compact ? (const char*)(((uint8_t*)str_addr) + sizeof(ascii)) + : (const char*)str._base.utf8; + if (data == NULL) + return ErrorKind::PyUnicodeError; + + Py_ssize_t size = ascii.state.compact ? ascii.length : str._base.utf8_length; + if (size < 0 || size > 1024) + return ErrorKind::PyUnicodeError; + + auto dest = std::string(size, '\0'); + if (copy_generic(data, dest.data(), size)) + return ErrorKind::PyUnicodeError; + + return Result(dest); +} + +// ---------------------------------------------------------------------------- + +class StringTable : public std::unordered_map +{ +public: + using Key = uintptr_t; + + + static constexpr Key INVALID = 1; + static constexpr Key UNKNOWN = 2; + + // Python string object + [[nodiscard]] inline Result key(PyObject* s) + { + const std::lock_guard lock(table_lock); + + auto k = (Key)s; + + if (this->find(k) == this->end()) + { +#if PY_VERSION_HEX >= 0x030c0000 + // The task name might hold a PyLong for deferred task name formatting. + std::string str = "Task-"; + + auto maybe_long = pylong_to_llong(s); + if (maybe_long) + { + str += std::to_string(*maybe_long); + } + else + { + auto maybe_unicode = pyunicode_to_utf8(s); + if (!maybe_unicode) { + return ErrorKind::PyUnicodeError; + } + + str = *maybe_unicode; + } +#else + auto maybe_unicode = pyunicode_to_utf8(s); + if (!maybe_unicode) { + return ErrorKind::PyUnicodeError; + } + + std::string str = std::move(*maybe_unicode); +#endif + this->emplace(k, str); + Renderer::get().string(k, str); + } + + return Result(k); + }; + + // Python string object + [[nodiscard]] inline Key key_unsafe(PyObject* s) + { + const std::lock_guard lock(table_lock); + + auto k = (Key)s; + + if (this->find(k) == this->end()) + { +#if PY_VERSION_HEX >= 0x030c0000 + // The task name might hold a PyLong for deferred task name formatting. + auto str = (PyLong_CheckExact(s)) ? "Task-" + std::to_string(PyLong_AsLong(s)) + : std::string(PyUnicode_AsUTF8(s)); +#else + auto str = std::string(PyUnicode_AsUTF8(s)); +#endif + this->emplace(k, str); + Renderer::get().string(k, str); + } + + return k; + }; + +#ifndef UNWIND_NATIVE_DISABLE + // Native filename by program counter + [[nodiscard]] inline Key key(unw_word_t pc) + { + const std::lock_guard lock(table_lock); + + auto k = (Key)pc; + + if (this->find(k) == this->end()) + { + char buffer[32] = {0}; + std::snprintf(buffer, 32, "native@%p", (void*)k); + this->emplace(k, buffer); + Renderer::get().string(k, buffer); + } + + return k; + } + + // Native scope name by unwinding cursor + [[nodiscard]] inline Result key(unw_cursor_t& cursor) + { + const std::lock_guard lock(table_lock); + + unw_proc_info_t pi; + if ((unw_get_proc_info(&cursor, &pi))) + return ErrorKind::UnwindError; + + auto k = (Key)pi.start_ip; + + if (this->find(k) == this->end()) + { + unw_word_t offset; // Ignored. All the information is in the PC anyway. + char sym[256]; + if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset)) + return ErrorKind::UnwindError; + + char* name = sym; + + // Try to demangle C++ names + char* demangled = NULL; + if (name[0] == '_' && name[1] == 'Z') + { + int status; + demangled = abi::__cxa_demangle(name, NULL, NULL, &status); + if (status == 0) + name = demangled; + } + + this->emplace(k, name); + Renderer::get().string(k, name); + + if (demangled) + std::free(demangled); + } + + return Result(k); + } +#endif // UNWIND_NATIVE_DISABLE + + [[nodiscard]] inline Result lookup(Key key) + { + const std::lock_guard lock(table_lock); + + auto it = this->find(key); + if (it == this->end()) + return ErrorKind::LookupError; + + return Result(&it->second); + }; + + StringTable() : std::unordered_map() + { + this->emplace(0, ""); + this->emplace(INVALID, ""); + this->emplace(UNKNOWN, ""); + }; + +private: + std::mutex table_lock; +}; + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline StringTable& string_table = *(new StringTable()); diff --git a/ddtrace/internal/datadog/profiling/echion/echion/tasks.h b/ddtrace/internal/datadog/profiling/echion/echion/tasks.h new file mode 100644 index 00000000000..70d3be65afe --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/tasks.h @@ -0,0 +1,335 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#include + +#define Py_BUILD_CORE +#if PY_VERSION_HEX >= 0x030d0000 +#include +#else +#include +#endif // PY_VERSION_HEX >= 0x030d0000 +#else +#include +#include +#endif // PY_VERSION_HEX >= 0x30b0000 + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// Max number of recursive calls GenInfo::GenInfo and TaskInfo::TaskInfo can do +// before raising an error. +const constexpr size_t MAX_RECURSION_DEPTH = 250; + +class GenInfo +{ +public: + typedef std::unique_ptr Ptr; + + class Error : public std::exception + { + public: + const char* what() const noexcept override + { + return "Cannot create generator info object"; + } + }; + + PyObject* origin = NULL; + PyObject* frame = NULL; + + GenInfo::Ptr await = nullptr; + + bool is_running = false; + + [[nodiscard]] static Result create(PyObject* gen_addr); + GenInfo(PyObject* origin, PyObject* frame, GenInfo::Ptr await, bool is_running) + : origin(origin), frame(frame), await(std::move(await)), is_running(is_running) { + + } +}; + +inline Result GenInfo::create(PyObject* gen_addr) +{ + static thread_local size_t recursion_depth = 0; + recursion_depth++; + + if (recursion_depth > MAX_RECURSION_DEPTH) { + recursion_depth--; + return ErrorKind::GenInfoError; + } + + PyGenObject gen; + + if (copy_type(gen_addr, gen) || !PyCoro_CheckExact(&gen)) { + recursion_depth--; + return ErrorKind::GenInfoError; + } + + auto origin = gen_addr; + +#if PY_VERSION_HEX >= 0x030b0000 + // The frame follows the generator object + auto frame = (gen.gi_frame_state == FRAME_CLEARED) + ? NULL + : (PyObject*)((char*)gen_addr + offsetof(PyGenObject, gi_iframe)); +#else + auto frame = (PyObject*)gen.gi_frame; +#endif + + PyFrameObject f; + if (copy_type(frame, f)) { + recursion_depth--; + return ErrorKind::GenInfoError; + } + + PyObject* yf = (frame != NULL ? PyGen_yf(&gen, frame) : NULL); + GenInfo::Ptr await = nullptr; + if (yf != NULL && yf != gen_addr) + { + auto maybe_await = GenInfo::create(yf); + if (maybe_await) { + await = std::move(*maybe_await); + } + } + +#if PY_VERSION_HEX >= 0x030b0000 + auto is_running = (gen.gi_frame_state == FRAME_EXECUTING); +#elif PY_VERSION_HEX >= 0x030a0000 + auto is_running = (frame != NULL) ? _PyFrame_IsExecuting(&f) : false; +#else + auto is_running = gen.gi_running; +#endif + + recursion_depth--; + return std::make_unique(origin, frame, std::move(await), is_running); +} + +// ---------------------------------------------------------------------------- + +class TaskInfo +{ +public: + typedef std::unique_ptr Ptr; + typedef std::reference_wrapper Ref; + + class Error : public std::exception + { + public: + const char* what() const noexcept override + { + return "Cannot create task info object"; + } + }; + + PyObject* origin = NULL; + PyObject* loop = NULL; + + GenInfo::Ptr coro = nullptr; + + StringTable::Key name; + + // Information to reconstruct the async stack as best as we can + TaskInfo::Ptr waiter = nullptr; + + [[nodiscard]] static Result create(TaskObj*); + TaskInfo(PyObject* origin, PyObject* loop, GenInfo::Ptr coro, StringTable::Key name, TaskInfo::Ptr waiter) + : origin(origin), loop(loop), coro(std::move(coro)), name(name), waiter(std::move(waiter)) { + + } + + [[nodiscard]] static Result current(PyObject*); + inline size_t unwind(FrameStack&); +}; + +inline std::unordered_map task_link_map; +inline std::mutex task_link_map_lock; + +// ---------------------------------------------------------------------------- +inline Result TaskInfo::create(TaskObj* task_addr) +{ + static thread_local size_t recursion_depth = 0; + recursion_depth++; + + if (recursion_depth > MAX_RECURSION_DEPTH) { + recursion_depth--; + return ErrorKind::TaskInfoError; + } + + TaskObj task; + if (copy_type(task_addr, task)) { + recursion_depth--; + return ErrorKind::TaskInfoError; + } + + auto maybe_coro = GenInfo::create(task.task_coro); + if (!maybe_coro) { + recursion_depth--; + return ErrorKind::TaskInfoGeneratorError; + } + + auto origin = (PyObject*)task_addr; + + auto maybe_name = string_table.key(task.task_name); + if (!maybe_name) { + recursion_depth--; + return ErrorKind::TaskInfoError; + } + + auto name = *maybe_name; + auto loop = task.task_loop; + + TaskInfo::Ptr waiter = nullptr; + if (task.task_fut_waiter) + { + auto maybe_waiter = TaskInfo::create((TaskObj*)task.task_fut_waiter); // TODO: Make lazy? + if (maybe_waiter) { + waiter = std::move(*maybe_waiter); + } + } + + recursion_depth--; + return std::make_unique(origin, loop, std::move(*maybe_coro), name, std::move(waiter)); +} + +// ---------------------------------------------------------------------------- +inline Result TaskInfo::current(PyObject* loop) +{ + if (loop == NULL) { + return ErrorKind::TaskInfoError; + } + + auto maybe_current_tasks_dict = MirrorDict::create(asyncio_current_tasks); + if (!maybe_current_tasks_dict) { + return ErrorKind::TaskInfoError; + } + + auto current_tasks_dict = std::move(*maybe_current_tasks_dict); + auto maybe_task = current_tasks_dict.get_item(loop); + if (!maybe_task) { + return ErrorKind::TaskInfoError; + } + + PyObject* task = *maybe_task; + if (task == NULL) { + return ErrorKind::TaskInfoError; + } + + return TaskInfo::create((TaskObj*)task); +} + +// ---------------------------------------------------------------------------- +// TODO: Make this a "for_each_task" function? +[[nodiscard]] inline Result> get_all_tasks(PyObject* loop) +{ + std::vector tasks; + if (loop == NULL) + return tasks; + + auto maybe_scheduled_tasks_set = MirrorSet::create(asyncio_scheduled_tasks); + if (!maybe_scheduled_tasks_set) { + return ErrorKind::TaskInfoError; + } + + auto scheduled_tasks_set = std::move(*maybe_scheduled_tasks_set); + auto maybe_scheduled_tasks = scheduled_tasks_set.as_unordered_set(); + if (!maybe_scheduled_tasks) { + return ErrorKind::TaskInfoError; + } + + auto scheduled_tasks = std::move(*maybe_scheduled_tasks); + for (auto task_wr_addr : scheduled_tasks) + { + PyWeakReference task_wr; + if (copy_type(task_wr_addr, task_wr)) + continue; + + auto maybe_task_info = TaskInfo::create((TaskObj*)task_wr.wr_object); + if (maybe_task_info) { + if ((*maybe_task_info)->loop == loop) { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + + if (asyncio_eager_tasks != NULL) + { + auto maybe_eager_tasks_set = MirrorSet::create(asyncio_eager_tasks); + if (!maybe_eager_tasks_set) { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks_set = std::move(*maybe_eager_tasks_set); + + auto maybe_eager_tasks = eager_tasks_set.as_unordered_set(); + if (!maybe_eager_tasks) { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks = std::move(*maybe_eager_tasks); + for (auto task_addr : eager_tasks) + { + auto maybe_task_info = TaskInfo::create((TaskObj*)task_addr); + if (maybe_task_info) { + if ((*maybe_task_info)->loop == loop) { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + } + + return tasks; +} + +// ---------------------------------------------------------------------------- + +inline std::vector> current_tasks; + +// ---------------------------------------------------------------------------- + +inline size_t TaskInfo::unwind(FrameStack& stack) +{ + // TODO: Check for running task. + std::stack coro_frames; + + // Unwind the coro chain + for (auto coro = this->coro.get(); coro != NULL; coro = coro->await.get()) + { + if (coro->frame != NULL) + coro_frames.push(coro->frame); + } + + int count = 0; + + // Unwind the coro frames + while (!coro_frames.empty()) + { + PyObject* frame = coro_frames.top(); + coro_frames.pop(); + + count += unwind_frame(frame, stack); + } + + return count; +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/threads.h b/ddtrace/internal/datadog/profiling/echion/echion/threads.h new file mode 100644 index 00000000000..5bb017bfc52 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/threads.h @@ -0,0 +1,625 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#define Py_BUILD_CORE + +#include +#include +#include +#include +#include + +#if defined PL_LINUX +#include +#elif defined PL_DARWIN +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +class ThreadInfo +{ +public: + using Ptr = std::unique_ptr; + + class Error : public std::exception + { + public: + const char* what() const noexcept override + { + return "Cannot create thread info object"; + } + }; + + + uintptr_t thread_id; + unsigned long native_id; + + std::string name; + +#if defined PL_LINUX + clockid_t cpu_clock_id; +#elif defined PL_DARWIN + mach_port_t mach_port; +#endif + microsecond_t cpu_time; + + uintptr_t asyncio_loop = 0; + + [[nodiscard]] Result update_cpu_time(); + bool is_running(); + + [[nodiscard]] Result sample(int64_t, PyThreadState*, microsecond_t); + void unwind(PyThreadState*); + + // ------------------------------------------------------------------------ +#if defined PL_LINUX + ThreadInfo(uintptr_t thread_id, unsigned long native_id, const char* name, clockid_t cpu_clock_id) + : thread_id(thread_id), native_id(native_id), name(name), cpu_clock_id(cpu_clock_id) + { + } +#elif defined PL_DARWIN + ThreadInfo(uintptr_t thread_id, unsigned long native_id, const char* name, mach_port_t mach_port) + : thread_id(thread_id), native_id(native_id), name(name), mach_port(mach_port) + { + } +#endif + + [[nodiscard]] static Result> create(uintptr_t thread_id, unsigned long native_id, const char* name) { +#if defined PL_LINUX + clockid_t cpu_clock_id; + if (pthread_getcpuclockid((pthread_t)thread_id, &cpu_clock_id)) { + return ErrorKind::ThreadInfoError; + } + + auto result = std::make_unique(thread_id, native_id, name, cpu_clock_id); +#elif defined PL_DARWIN + mach_port_t mach_port; + // pthread_mach_thread_np does not return a status code; the behaviour is undefined + // if thread_id is invalid. + mach_port = pthread_mach_thread_np((pthread_t)thread_id); + + auto result = std::make_unique(thread_id, native_id, name, mach_port); + #endif + + auto update_cpu_time_success = result->update_cpu_time(); + if (!update_cpu_time_success) { + return ErrorKind::ThreadInfoError; + } + + return result; + }; + +private: + [[nodiscard]] Result unwind_tasks(); + void unwind_greenlets(PyThreadState*, unsigned long); +}; + +inline Result ThreadInfo::update_cpu_time() +{ +#if defined PL_LINUX + struct timespec ts; + if (clock_gettime(cpu_clock_id, &ts)) { + // If the clock is invalid, we skip updating the CPU time. + // This can happen if we try to compute CPU time for a thread that has exited. + if (errno == EINVAL) { + return Result::ok(); + } + + return ErrorKind::CpuTimeError; + } + + this->cpu_time = TS_TO_MICROSECOND(ts); +#elif defined PL_DARWIN + thread_basic_info_data_t info; + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + kern_return_t kr = + thread_info((thread_act_t)this->mach_port, THREAD_BASIC_INFO, (thread_info_t)&info, &count); + + if (kr != KERN_SUCCESS) { + // If the thread is invalid, we skip updating the CPU time. + // This can happen if we try to compute CPU time for a thread that has exited. + if (kr == KERN_INVALID_ARGUMENT) { + return Result::ok(); + } + + return ErrorKind::CpuTimeError; + } + + if (info.flags & TH_FLAGS_IDLE) { + return Result::ok(); + } + + this->cpu_time = TV_TO_MICROSECOND(info.user_time) + TV_TO_MICROSECOND(info.system_time); +#endif + + return Result::ok(); +} + +inline bool ThreadInfo::is_running() +{ +#if defined PL_LINUX + struct timespec ts1, ts2; + + // Get two back-to-back times + if (clock_gettime(cpu_clock_id, &ts1) != 0) + return false; + if (clock_gettime(cpu_clock_id, &ts2) != 0) + return false; + + // If the CPU time has advanced, the thread is running + return (ts1.tv_sec != ts2.tv_sec || ts1.tv_nsec != ts2.tv_nsec); + +#elif defined PL_DARWIN + thread_basic_info_data_t info; + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + kern_return_t kr = + thread_info((thread_act_t)this->mach_port, THREAD_BASIC_INFO, (thread_info_t)&info, &count); + + if (kr != KERN_SUCCESS) + return false; + + return info.run_state == TH_STATE_RUNNING; + +#endif +} + +// ---------------------------------------------------------------------------- + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline std::unordered_map& thread_info_map = + *(new std::unordered_map()); // indexed by thread_id + +inline std::mutex thread_info_map_lock; + +// ---------------------------------------------------------------------------- +inline void ThreadInfo::unwind(PyThreadState* tstate) +{ + if (native) + { + // Lock on the signal handler. Will get unlocked once the handler is + // done unwinding the native stack. + const std::lock_guard guard(sigprof_handler_lock); + + // Pass the current thread state to the signal handler. This is needed + // to unwind the Python stack from within it. + current_tstate = tstate; + + // Send a signal to the thread to unwind its native stack. + pthread_kill((pthread_t)tstate->thread_id, SIGPROF); + + // Lock to wait for the signal handler to finish unwinding the native + // stack. Release the lock immediately after so that it is available + // for the next thread. + sigprof_handler_lock.lock(); + } + else + { + unwind_python_stack(tstate); + if (asyncio_loop) + { + auto unwind_tasks_success = unwind_tasks(); + if (!unwind_tasks_success) { + // If we fail, that's OK + } + } + + // We make the assumption that gevent and asyncio are not mixed + // together to keep the logic here simple. We can always revisit this + // should there be a substantial demand for it. + unwind_greenlets(tstate, native_id); + } +} + +// ---------------------------------------------------------------------------- +inline Result ThreadInfo::unwind_tasks() +{ + std::vector leaf_tasks; + std::unordered_set parent_tasks; + std::unordered_map waitee_map; // Indexed by task origin + std::unordered_map origin_map; // Indexed by task origin + + auto maybe_all_tasks = get_all_tasks((PyObject*)asyncio_loop); + if (!maybe_all_tasks) { + return ErrorKind::TaskInfoError; + } + + auto all_tasks = std::move(*maybe_all_tasks); + { + std::lock_guard lock(task_link_map_lock); + + // Clean up the task_link_map. Remove entries associated to tasks that + // no longer exist. + std::unordered_set all_task_origins; + std::transform(all_tasks.cbegin(), all_tasks.cend(), + std::inserter(all_task_origins, all_task_origins.begin()), + [](const TaskInfo::Ptr& task) { return task->origin; }); + + std::vector to_remove; + for (auto kv : task_link_map) + { + if (all_task_origins.find(kv.first) == all_task_origins.end()) + to_remove.push_back(kv.first); + } + for (auto key : to_remove) + task_link_map.erase(key); + + // Determine the parent tasks from the gather links. + std::transform(task_link_map.cbegin(), task_link_map.cend(), + std::inserter(parent_tasks, parent_tasks.begin()), + [](const std::pair& kv) { return kv.second; }); + } + + for (auto& task : all_tasks) + { + origin_map.emplace(task->origin, std::ref(*task)); + + if (task->waiter != NULL) + waitee_map.emplace(task->waiter->origin, std::ref(*task)); + else if (parent_tasks.find(task->origin) == parent_tasks.end()) + { + if (cpu && ignore_non_running_threads && !task->coro->is_running) + { + // This task is not running, so we skip it if we are + // interested in just CPU time. + continue; + } + leaf_tasks.push_back(std::ref(*task)); + } + } + + for (auto& task : leaf_tasks) + { + bool on_cpu = task.get().coro->is_running; + auto stack_info = std::make_unique(task.get().name, on_cpu); + auto& stack = stack_info->stack; + for (auto current_task = task;;) + { + auto& task = current_task.get(); + + size_t stack_size = task.unwind(stack); + + if (on_cpu) + { + // Undo the stack unwinding + // TODO[perf]: not super-efficient :( + for (size_t i = 0; i < stack_size; i++) + stack.pop_back(); + + // Instead we get part of the thread stack + FrameStack temp_stack; + size_t nframes = + (python_stack.size() > stack_size) ? python_stack.size() - stack_size : 0; + for (size_t i = 0; i < nframes; i++) + { + auto python_frame = python_stack.front(); + temp_stack.push_front(python_frame); + python_stack.pop_front(); + } + while (!temp_stack.empty()) + { + stack.push_front(temp_stack.front()); + temp_stack.pop_front(); + } + } + + // Add the task name frame + stack.push_back(Frame::get(task.name)); + + // Get the next task in the chain + PyObject* task_origin = task.origin; + if (waitee_map.find(task_origin) != waitee_map.end()) + { + current_task = waitee_map.find(task_origin)->second; + continue; + } + + { + // Check for, e.g., gather links + std::lock_guard lock(task_link_map_lock); + + if (task_link_map.find(task_origin) != task_link_map.end() && + origin_map.find(task_link_map[task_origin]) != origin_map.end()) + { + current_task = origin_map.find(task_link_map[task_origin])->second; + continue; + } + } + + break; + } + + // Finish off with the remaining thread stack + for (auto p = python_stack.begin(); p != python_stack.end(); p++) + stack.push_back(*p); + + current_tasks.push_back(std::move(stack_info)); + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +inline void ThreadInfo::unwind_greenlets(PyThreadState* tstate, unsigned long native_id) +{ + const std::lock_guard guard(greenlet_info_map_lock); + + if (greenlet_thread_map.find(native_id) == greenlet_thread_map.end()) + return; + + std::unordered_set parent_greenlets; + + // Collect all parent greenlets + std::transform( + greenlet_parent_map.cbegin(), greenlet_parent_map.cend(), + std::inserter(parent_greenlets, parent_greenlets.begin()), + [](const std::pair& kv) { return kv.second; }); + + // Unwind the leaf greenlets + for (auto& greenlet_info : greenlet_info_map) + { + auto greenlet_id = greenlet_info.first; + auto& greenlet = greenlet_info.second; + + if (parent_greenlets.find(greenlet_id) != parent_greenlets.end()) + continue; + + auto frame = greenlet->frame; + if (frame == FRAME_NOT_SET) + { + // The greenlet has not been started yet or has finished + continue; + } + + bool on_cpu = frame == Py_None; + if (cpu && ignore_non_running_threads && !on_cpu) + { + // Only the currently-running greenlet has a None in its frame + // cell. If we are interested in CPU time, we skip all greenlets + // that have an actual frame, as they are not running. + continue; + } + + auto stack_info = std::make_unique(greenlet->name, on_cpu); + auto& stack = stack_info->stack; + + greenlet->unwind(frame, tstate, stack); + + // Unwind the parent greenlets + for (;;) + { + auto parent_greenlet_info = greenlet_parent_map.find(greenlet_id); + if (parent_greenlet_info == greenlet_parent_map.end()) + break; + + auto parent_greenlet_id = parent_greenlet_info->second; + + auto parent_greenlet = greenlet_info_map.find(parent_greenlet_id); + if (parent_greenlet == greenlet_info_map.end()) + break; + + auto parent_frame = parent_greenlet->second->frame; + if (parent_frame == FRAME_NOT_SET || parent_frame == Py_None) + break; + + parent_greenlet->second->unwind(parent_frame, tstate, stack); + + // Move up the greenlet chain + greenlet_id = parent_greenlet_id; + } + + current_greenlets.push_back(std::move(stack_info)); + } +} + +// ---------------------------------------------------------------------------- +inline Result ThreadInfo::sample(int64_t iid, PyThreadState* tstate, microsecond_t delta) +{ + Renderer::get().render_thread_begin(tstate, name, delta, thread_id, native_id); + + if (cpu) + { + microsecond_t previous_cpu_time = cpu_time; + auto update_cpu_time_success = update_cpu_time(); + if (!update_cpu_time_success) { + return ErrorKind::CpuTimeError; + } + + bool running = is_running(); + if (!running && ignore_non_running_threads) + { + return Result::ok(); + } + + Renderer::get().render_cpu_time(running ? cpu_time - previous_cpu_time : 0); + } + + unwind(tstate); + + // Asyncio tasks + if (current_tasks.empty()) + { + // If we don't have any asyncio tasks, we check that we don't have any + // greenlets either. In this case, we print the ordinary thread stack. + // With greenlets, we recover the thread stack from the active greenlet, + // so if we don't skip here we would have a double print. + if (current_greenlets.empty()) + { + // Print the PID and thread name + Renderer::get().render_stack_begin(pid, iid, name); + // Print the stack + if (native) + { + if (!interleave_stacks()) { + return ErrorKind::ThreadInfoError; + } + + interleaved_stack.render(); + } + else + python_stack.render(); + + Renderer::get().render_stack_end(MetricType::Time, delta); + } + } + else + { + for (auto& task_stack_info : current_tasks) + { + auto maybe_task_name = string_table.lookup(task_stack_info->task_name); + if (!maybe_task_name) { + return ErrorKind::ThreadInfoError; + } + + auto task_name = *maybe_task_name; + Renderer::get().render_task_begin(*task_name, task_stack_info->on_cpu); + Renderer::get().render_stack_begin(pid, iid, name); + if (native) + { + // NOTE: These stacks might be non-sensical, especially with + // Python < 3.11. + if (!interleave_stacks(task_stack_info->stack)) { + return ErrorKind::ThreadInfoError; + } + + interleaved_stack.render(); + } + else + task_stack_info->stack.render(); + + Renderer::get().render_stack_end(MetricType::Time, delta); + } + + current_tasks.clear(); + } + + // Greenlet stacks + if (!current_greenlets.empty()) + { + for (auto& greenlet_stack : current_greenlets) + { + auto maybe_task_name = string_table.lookup(greenlet_stack->task_name); + if (!maybe_task_name) { + return ErrorKind::ThreadInfoError; + } + + auto task_name = *maybe_task_name; + Renderer::get().render_task_begin(*task_name, greenlet_stack->on_cpu); + Renderer::get().render_stack_begin(pid, iid, name); + + auto& stack = greenlet_stack->stack; + if (native) + { + // NOTE: These stacks might be non-sensical, especially with + // Python < 3.11. + if (!interleave_stacks(stack)) { + return ErrorKind::ThreadInfoError; + } + + interleaved_stack.render(); + } + else + stack.render(); + + Renderer::get().render_stack_end(MetricType::Time, delta); + } + + current_greenlets.clear(); + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +static void for_each_thread(InterpreterInfo& interp, + std::function callback) +{ + std::unordered_set threads; + std::unordered_set seen_threads; + + threads.clear(); + seen_threads.clear(); + + // Start from the thread list head + threads.insert((PyThreadState*)interp.tstate_head); + + while (!threads.empty()) + { + // Pop the next thread + PyThreadState* tstate_addr = *threads.begin(); + threads.erase(threads.begin()); + + // Mark the thread as seen + seen_threads.insert(tstate_addr); + + // Since threads can be created and destroyed at any time, we make + // a copy of the structure before trying to read its fields. + PyThreadState tstate; + if (copy_type(tstate_addr, tstate)) + // We failed to copy the thread so we skip it. + continue; + + // Enqueue the unseen threads that we can reach from this thread. + if (tstate.next != NULL && seen_threads.find(tstate.next) == seen_threads.end()) + threads.insert(tstate.next); + if (tstate.prev != NULL && seen_threads.find(tstate.prev) == seen_threads.end()) + threads.insert(tstate.prev); + + { + const std::lock_guard guard(thread_info_map_lock); + + if (thread_info_map.find(tstate.thread_id) == thread_info_map.end()) + { + // If the threading module was not imported in the target then + // we mistakenly take the hypno thread as the main thread. We + // assume that any missing thread is the actual main thread, + // provided we don't already have a thread with the name + // "MainThread". Note that this can also happen on shutdown, so + // we need to avoid doing anything in that case. +#if PY_VERSION_HEX >= 0x030b0000 + auto native_id = tstate.native_thread_id; +#else + auto native_id = getpid(); +#endif + bool main_thread_tracked = false; + for (auto& kv : thread_info_map) + { + if (kv.second->name == "MainThread") + { + main_thread_tracked = true; + break; + } + } + if (main_thread_tracked) + continue; + + auto maybe_thread_info = ThreadInfo::create(tstate.thread_id, native_id, "MainThread"); + if (!maybe_thread_info) { + // We failed to create the thread info object so we skip it. + // We'll likely try again later with the valid thread + // information. + continue; + } + + thread_info_map.emplace(tstate.thread_id, std::move(*maybe_thread_info)); + } + + // Call back with the thread state and thread info. + callback(&tstate, *thread_info_map.find(tstate.thread_id)->second); + } + } +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/timing.h b/ddtrace/internal/datadog/profiling/echion/echion/timing.h new file mode 100644 index 00000000000..2f3d11695bd --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/timing.h @@ -0,0 +1,36 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#if defined PL_LINUX +#include +#elif defined PL_DARWIN +#include +#include + +inline clock_serv_t cclock; +#endif + +typedef unsigned long microsecond_t; + +inline microsecond_t last_time = 0; + +#define TS_TO_MICROSECOND(ts) ((ts).tv_sec * 1e6 + (ts).tv_nsec / 1e3) +#define TV_TO_MICROSECOND(tv) ((tv).seconds * 1e6 + (tv).microseconds) + +// ---------------------------------------------------------------------------- +static microsecond_t gettime() +{ +#if defined PL_LINUX + struct timespec ts; + if (clock_gettime(CLOCK_BOOTTIME, &ts)) + return 0; + return TS_TO_MICROSECOND(ts); +#elif defined PL_DARWIN + mach_timespec_t ts; + clock_get_time(cclock, &ts); + return TS_TO_MICROSECOND(ts); +#endif +} diff --git a/ddtrace/internal/datadog/profiling/echion/echion/vm.h b/ddtrace/internal/datadog/profiling/echion/echion/vm.h new file mode 100644 index 00000000000..2073491e706 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/echion/echion/vm.h @@ -0,0 +1,301 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include +#include +#include +#include +#include + +#if defined PL_LINUX +#include +#include +#include +#include +#include +#include +#include + +typedef pid_t proc_ref_t; + +ssize_t process_vm_readv(pid_t, const struct iovec*, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, + unsigned long flags); + +#define copy_type(addr, dest) (copy_memory(pid, addr, sizeof(dest), &dest)) +#define copy_type_p(addr, dest) (copy_memory(pid, addr, sizeof(*dest), dest)) +#define copy_generic(addr, dest, size) (copy_memory(pid, (void*)(addr), size, (void*)(dest))) + +#elif defined PL_DARWIN +#include +#include +#include +#include +#include + +typedef mach_port_t proc_ref_t; + +#define copy_type(addr, dest) (copy_memory(mach_task_self(), addr, sizeof(dest), &dest)) +#define copy_type_p(addr, dest) (copy_memory(mach_task_self(), addr, sizeof(*dest), dest)) +#define copy_generic(addr, dest, size) \ + (copy_memory(mach_task_self(), (void*)(addr), size, (void*)(dest))) +#endif + +// Some checks are done at static initialization, so use this to read them at runtime +inline bool failed_safe_copy = false; + +#if defined PL_LINUX +inline ssize_t (*safe_copy)(pid_t, const struct iovec*, unsigned long, const struct iovec*, + unsigned long, unsigned long) = process_vm_readv; + +class VmReader +{ + void* buffer{nullptr}; + size_t sz{0}; + int fd{-1}; + inline static VmReader* instance{nullptr}; // Prevents having to set this in implementation + + VmReader(size_t _sz, void* _buffer, int _fd) : buffer(_buffer), sz{_sz}, fd{_fd} + { + } + + static VmReader* create(size_t sz) + { + // Makes a temporary file and ftruncates it to the specified size + std::array tmp_dirs = {"/dev/shm", "/tmp", "/var/tmp"}; + std::string tmp_suffix = "/echion-XXXXXX"; + + int fd = -1; + void* ret = nullptr; + + for (auto& tmp_dir : tmp_dirs) + { + // Reset the file descriptor, just in case + close(fd); + fd = -1; + + // Create the temporary file + std::string tmpfile = tmp_dir + tmp_suffix; + fd = mkstemp(tmpfile.data()); + if (fd == -1) + continue; + + // Unlink might fail if delete is blocked on the VFS, but currently no action is taken + unlink(tmpfile.data()); + + // Make sure we have enough size + if (ftruncate(fd, sz) == -1) + { + continue; + } + + // Map the file + ret = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (ret == MAP_FAILED) + { + ret = nullptr; + continue; + } + + // Successful. Break. + break; + } + + return new VmReader(sz, ret, fd); + } + + bool is_valid() const { return buffer != nullptr; } + +public: + static VmReader* get_instance() + { + if (instance == nullptr) + { + instance = VmReader::create(1024 * 1024); // A megabyte? + if (!instance) + { + std::cerr << "Failed to initialize VmReader with buffer size " << instance->sz << std::endl; + return nullptr; + } + } + + return instance; + } + + ssize_t safe_copy(pid_t pid, const struct iovec* local_iov, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, unsigned long flags) + { + (void)pid; + (void)flags; + if (liovcnt != 1 || riovcnt != 1) + { + // Unsupported + return 0; + } + + // Check to see if we need to resize the buffer + if (remote_iov[0].iov_len > sz) + { + if (ftruncate(fd, remote_iov[0].iov_len) == -1) + { + return 0; + } + else + { + void* tmp = mremap(buffer, sz, remote_iov[0].iov_len, MREMAP_MAYMOVE); + if (tmp == MAP_FAILED) + { + return 0; + } + buffer = tmp; // no need to munmap + sz = remote_iov[0].iov_len; + } + } + + ssize_t ret = pwritev(fd, remote_iov, riovcnt, 0); + if (ret == -1) + { + return ret; + } + + // Copy the data from the buffer to the remote process + std::memcpy(local_iov[0].iov_base, buffer, local_iov[0].iov_len); + return ret; + } + + ~VmReader() + { + if (buffer) + { + munmap(buffer, sz); + } + if (fd != -1) + { + close(fd); + } + instance = nullptr; + } +}; + +/** + * Initialize the safe copy operation on Linux + */ +inline bool read_process_vm_init() +{ + VmReader* _ = VmReader::get_instance(); + return !!_; +} + +inline ssize_t vmreader_safe_copy(pid_t pid, const struct iovec* local_iov, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, + unsigned long flags) +{ + auto reader = VmReader::get_instance(); + if (!reader) + return 0; + return reader->safe_copy(pid, local_iov, liovcnt, remote_iov, riovcnt, flags); +} + +/** + * Initialize the safe copy operation on Linux + * + * This occurs at static init + */ +__attribute__((constructor)) inline void init_safe_copy() +{ + char src[128]; + char dst[128]; + for (size_t i = 0; i < 128; i++) + { + src[i] = 0x41; + dst[i] = ~0x42; + } + + // Check to see that process_vm_readv works, unless it's overridden + const char force_override_str[] = "ECHION_ALT_VM_READ_FORCE"; + const std::array truthy_values = {"1", "true", "yes", + "on", "enable", "enabled"}; + const char* force_override = std::getenv(force_override_str); + if (!force_override || std::find(truthy_values.begin(), truthy_values.end(), force_override) == + truthy_values.end()) + { + struct iovec iov_dst = {dst, sizeof(dst)}; + struct iovec iov_src = {src, sizeof(src)}; + ssize_t result = process_vm_readv(getpid(), &iov_dst, 1, &iov_src, 1, 0); + + // If we succeed, then use process_vm_readv + if (result == sizeof(src)) + { + safe_copy = process_vm_readv; + return; + } + } + + // Else, we have to setup the writev method + if (!read_process_vm_init()) + { + // std::cerr might not have been fully initialized at this point, so use + // fprintf instead. + fprintf(stderr, "Failed to initialize all safe copy interfaces\n"); + failed_safe_copy = true; + return; + } + + safe_copy = vmreader_safe_copy; +} +#endif + +/** + * Copy a chunk of memory from a portion of the virtual memory of another + * process. + * @param proc_ref_t the process reference (platform-dependent) + * @param void * the remote address + * @param ssize_t the number of bytes to read + * @param void * the destination buffer, expected to be at least as large + * as the number of bytes to read. + * + * @return zero on success, otherwise non-zero. + */ +static inline int copy_memory(proc_ref_t proc_ref, void* addr, ssize_t len, void* buf) +{ + ssize_t result = -1; + + // Early exit on zero page + if (reinterpret_cast(addr) < 4096) + { + return result; + } + +#if defined PL_LINUX + struct iovec local[1]; + struct iovec remote[1]; + + local[0].iov_base = buf; + local[0].iov_len = len; + remote[0].iov_base = addr; + remote[0].iov_len = len; + + result = safe_copy(proc_ref, local, 1, remote, 1, 0); + +#elif defined PL_DARWIN + kern_return_t kr = mach_vm_read_overwrite(proc_ref, (mach_vm_address_t)addr, len, + (mach_vm_address_t)buf, (mach_vm_size_t*)&result); + + if (kr != KERN_SUCCESS) + return -1; + +#endif + + return len != result; +} + +inline pid_t pid = 0; + +inline void _set_pid(pid_t _pid) +{ + pid = _pid; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index fecd1776380..b7e42f876aa 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -40,13 +40,9 @@ if(NOT Threads_FOUND OR NOT CMAKE_USE_PTHREADS_INIT) endif() # Add echion -set(ECHION_COMMIT - "3ebeb3e975239f252fa0d6bb739344f35eaf1657" # https://github.com/kowalskithomas/echion/commit/3ebeb3e975239f252fa0d6bb739344f35eaf1657 - CACHE STRING "Commit hash of echion to use") FetchContent_Declare( echion - GIT_REPOSITORY "https://github.com/P403n1x87/echion.git" - GIT_TAG ${ECHION_COMMIT}) + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../echion") FetchContent_GetProperties(echion) if(NOT echion_POPULATED) @@ -54,7 +50,7 @@ if(NOT echion_POPULATED) endif() # Specify the target C-extension that we want to build -add_library(${EXTENSION_NAME} SHARED ${echion_SOURCE_DIR}/echion/frame.cc ${echion_SOURCE_DIR}/echion/render.cc +add_library(${EXTENSION_NAME} SHARED ${echion_SOURCE_DIR}/echion/frame.cc src/sampler.cpp src/stack_renderer.cpp src/stack_v2.cpp src/thread_span_links.cpp) # Add common config diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp index 6f1e4ec0417..ce9faaf2170 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp @@ -40,7 +40,7 @@ class StackRenderer : public RendererInterface // the sample is created, this has to be reset. bool pushed_task_name = false; - void open() override {} + Result open() override { return Result::ok(); } void close() override {} void header() override {} void metadata(const std::string&, const std::string&) override {} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp index 147e4d3b0b9..5c346f9f394 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp @@ -2,6 +2,7 @@ #include "thread_span_links.hpp" +#include "echion/errors.h" #include "echion/greenlets.h" #include "echion/interp.h" #include "echion/tasks.h" @@ -153,7 +154,7 @@ Sampler::sampling_thread(const uint64_t seq_num) // Perform the sample for_each_interp([&](InterpreterInfo& interp) -> void { for_each_thread(interp, [&](PyThreadState* tstate, ThreadInfo& thread) { - thread.sample(interp.id, tstate, wall_time_us); + (void)thread.sample(interp.id, tstate, wall_time_us); }); }); @@ -245,9 +246,10 @@ Sampler::register_thread(uint64_t id, uint64_t native_id, const char* name) static bool has_errored = false; auto it = thread_info_map.find(id); if (it == thread_info_map.end()) { - try { - thread_info_map.emplace(id, std::make_unique(id, native_id, name)); - } catch (const ThreadInfo::Error& e) { + auto maybe_thread_info = ThreadInfo::create(id, native_id, name); + if (maybe_thread_info) { + thread_info_map.emplace(id, std::move(*maybe_thread_info)); + } else { if (!has_errored) { has_errored = true; std::cerr << "Failed to register thread: " << std::hex << id << std::dec << " (" << native_id << ") " @@ -255,9 +257,10 @@ Sampler::register_thread(uint64_t id, uint64_t native_id, const char* name) } } } else { - try { - it->second = std::make_unique(id, native_id, name); - } catch (const ThreadInfo::Error& e) { + auto maybe_thread_info = ThreadInfo::create(id, native_id, name); + if (maybe_thread_info) { + it->second = std::move(*maybe_thread_info); + } else { if (!has_errored) { has_errored = true; std::cerr << "Failed to register thread: " << std::hex << id << std::dec << " (" << native_id << ") " diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp index bc3bcdbd455..bdadda27f8e 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp @@ -131,15 +131,18 @@ StackRenderer::render_frame(Frame& frame) static constexpr std::string_view missing_name = ""; std::string_view filename_str; std::string_view name_str; - try { - filename_str = string_table.lookup(frame.filename); - } catch (StringTable::Error&) { + + auto maybe_filename_str = string_table.lookup(frame.filename); + if (maybe_filename_str) { + filename_str = **maybe_filename_str; + } else { filename_str = missing_filename; } - try { - name_str = string_table.lookup(frame.name); - } catch (StringTable::Error&) { + auto maybe_name_str = string_table.lookup(frame.name); + if (maybe_name_str) { + name_str = **maybe_name_str; + } else { name_str = missing_name; } diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp index db56b0f6457..5af46e4268e 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp @@ -213,16 +213,15 @@ track_greenlet(PyObject* Py_UNUSED(m), PyObject* args) if (!PyArg_ParseTuple(args, "lOO", &greenlet_id, &name, &frame)) return NULL; - StringTable::Key greenlet_name; - - try { - greenlet_name = string_table.key(name); - } catch (StringTable::Error&) { + auto maybe_greenlet_name = string_table.key(name); + if (!maybe_greenlet_name) { // We failed to get this task but we keep going PyErr_SetString(PyExc_RuntimeError, "Failed to get greenlet name from the string table"); return NULL; } + auto greenlet_name = *maybe_greenlet_name; + Py_BEGIN_ALLOW_THREADS; Sampler::get().track_greenlet(greenlet_id, greenlet_name, frame); Py_END_ALLOW_THREADS; diff --git a/releasenotes/notes/profiling-echion-fix-alloc-f1204a794b1d3a1d.yaml b/releasenotes/notes/profiling-echion-fix-alloc-f1204a794b1d3a1d.yaml new file mode 100644 index 00000000000..d51709c60ad --- /dev/null +++ b/releasenotes/notes/profiling-echion-fix-alloc-f1204a794b1d3a1d.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + profiling: Upgrades echion to resolve an issue where stack profiler can + allocate a large amount of memory unnecessarily. +