From 5db41c3e14ea9159e76722b65116d894331aa8d4 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Fri, 5 Sep 2025 04:57:52 +0200 Subject: [PATCH 1/3] Implemented copy tracker for debugging --- CMakeLists.txt | 10 +++ include/sparrow/debug/copy_tracker.hpp | 29 +++++++ src/debug/copy_tracker.cpp | 105 +++++++++++++++++++++++++ src/record_batch.cpp | 11 +++ test/test_record_batch.cpp | 19 +++++ 5 files changed, 174 insertions(+) create mode 100644 include/sparrow/debug/copy_tracker.hpp create mode 100644 src/debug/copy_tracker.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 850a14f8b..4dbd8b684 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,6 +102,7 @@ OPTION(ENABLE_INTEGRATION_TEST "Enable integration tests" OFF) MESSAGE(STATUS "🔧 Enable integration tests: ${ENABLE_INTEGRATION_TEST}") OPTION(ENABLE_INTEGRATION_TEST "Creates json_reader target and enable integration tests" OFF) OPTION(CREATE_JSON_READER_TARGET "Create json_reader target, automatically set when ENABLE_INTEGRATION_TEST is ON" OFF) +OPTION(TRACK_COPIES, "Track copies in tests" OFF) if(ENABLE_INTEGRATION_TEST) set(CREATE_JSON_READER_TARGET ON) @@ -181,6 +182,11 @@ if(SPARROW_CONTRACTS_THROW_ON_FAILURE) list(APPEND SPARROW_COMPILE_DEFINITIONS SPARROW_CONTRACTS_THROW_ON_FAILURE=1) endif() +if(TRACK_COPIES) + message(STATUS "Tracking copies in tests") + list(APPEND SPARROW_COMPILE_DEFINITIONS SPARROW_TRACK_COPIES) +endif() + # Build # ===== set(BINARY_BUILD_DIR "${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}") @@ -227,6 +233,9 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/config/config.hpp ${SPARROW_INCLUDE_DIR}/sparrow/config/sparrow_version.hpp + # debug + ${SPARROW_INCLUDE_DIR}/sparrow/debug/copy_tracker.hpp + # detail ${SPARROW_INCLUDE_DIR}/sparrow/details/3rdparty/float16_t.hpp @@ -323,6 +332,7 @@ set(SPARROW_SRC ${SPARROW_SOURCE_DIR}/arrow_interface/arrow_array_schema_proxy.cpp ${SPARROW_SOURCE_DIR}/arrow_interface/arrow_schema.cpp ${SPARROW_SOURCE_DIR}/arrow_interface/private_data_ownership.cpp + ${SPARROW_SOURCE_DIR}/debug/copy_tracker.cpp ${SPARROW_SOURCE_DIR}/layout/array_factory.cpp ${SPARROW_SOURCE_DIR}/layout/array_helper.cpp ${SPARROW_SOURCE_DIR}/layout/fixed_width_binary_array_utils.cpp diff --git a/include/sparrow/debug/copy_tracker.hpp b/include/sparrow/debug/copy_tracker.hpp new file mode 100644 index 000000000..4be7c28b2 --- /dev/null +++ b/include/sparrow/debug/copy_tracker.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include "sparrow/config/config.hpp" + +namespace sparrow::copy_tracker +{ + template + std::string key(); + + constexpr bool is_enabled() + { +#ifdef SPARROW_TRACK_COPIES + return true; +#else + return false; +#endif + } + + SPARROW_API void increase(const std::string& key); + SPARROW_API void reset(const std::string& key); + SPARROW_API void reset_all(); + SPARROW_API int count(const std::string& key, int disabled_value = 0); + SPARROW_API std::vector key_list(); + +} + diff --git a/src/debug/copy_tracker.cpp b/src/debug/copy_tracker.cpp new file mode 100644 index 000000000..aa172e6f7 --- /dev/null +++ b/src/debug/copy_tracker.cpp @@ -0,0 +1,105 @@ +#include "sparrow/debug/copy_tracker.hpp" + +#include +#include +#include + +namespace sparrow::copy_tracker +{ +#ifdef SPARROW_TRACK_COPIES + + class copy_tracker_impl + { + public: + + static copy_tracker_impl& instance() + { + static copy_tracker_impl inst; + return inst; + } + + void increase(const std::string& key) + { + m_copy_count[key] += 1; + } + + void reset(const std::string& key) + { + m_copy_count[key] = 0; + } + + void reset_all() + { + m_copy_count.clear(); + } + + int count(const std::string& key) + { + return m_copy_count[key]; + } + + auto key_list() + { + return std::views::keys(m_copy_count); + } + + private: + + copy_tracker_impl() = default; + + std::map m_copy_count = {}; + }; + + void increase(const std::string& key) + { + return copy_tracker_impl::instance().increase(key); + } + + void reset(const std::string& key) + { + return copy_tracker_impl::instance().reset(key); + } + + void reset_all() + { + return copy_tracker_impl::instance().reset_all(); + } + + int count(const std::string& key, int /*disabled_value*/) + { + return copy_tracker_impl::instance().count(key); + } + + std::vector key_list() + { + auto keys = copy_tracker_impl::instance().key_list(); + return { keys.begin(), keys.end() }; + } + +#else + + void increase(const std::string&) + { + } + + void reset(const std::string&) + { + } + + void reset_all() + { + } + + int count(const std::string&, int disabled_value) + { + return disabled_value; + } + + std::vector key_list() + { + return {}; + } + +#endif + +} diff --git a/src/record_batch.cpp b/src/record_batch.cpp index 202b941d7..c6a3f54a8 100644 --- a/src/record_batch.cpp +++ b/src/record_batch.cpp @@ -16,10 +16,20 @@ #include +#include "sparrow/debug/copy_tracker.hpp" #include "sparrow/utils/contracts.hpp" namespace sparrow { + namespace copy_tracker + { + template <> + SPARROW_API std::string key() + { + return "record_batch"; + } + } + record_batch::record_batch(initializer_type init) { m_name_list.reserve(init.size()); @@ -97,6 +107,7 @@ namespace sparrow , m_array_list(rhs.m_array_list) { update_array_map_cache(); + copy_tracker::increase(copy_tracker::key()); } record_batch& record_batch::operator=(const record_batch& rhs) diff --git a/test/test_record_batch.cpp b/test/test_record_batch.cpp index 8fc449c89..fd0429528 100644 --- a/test/test_record_batch.cpp +++ b/test/test_record_batch.cpp @@ -14,6 +14,7 @@ #include +#include "sparrow/debug/copy_tracker.hpp" #include "sparrow/primitive_array.hpp" #include "sparrow/record_batch.hpp" @@ -199,9 +200,16 @@ namespace sparrow TEST_CASE("copy semantic") { +#ifdef SPARROW_TRACK_COPIES + auto key = std::string("record_batch");//copy_tracker::key(); + copy_tracker::reset(key); +#endif auto record1 = make_record_batch(col_size); auto record2(record1); CHECK_EQ(record1, record2); +#ifdef SPARROW_TRACK_COPIES + CHECK_EQ(copy_tracker::count(key), 1); +#endif auto record3 = make_record_batch(col_size + 2u); CHECK_NE(record1, record3); @@ -330,6 +338,17 @@ namespace sparrow ); CHECK(res); } +#ifdef SPARROW_TRACK_COPIES + TEST_CASE("emplace_back") + { + std::vector vec(3, make_record_batch(col_size)); + std::cout << "capacity: " << vec.capacity() << std::endl; + auto key = std::string("record_batch");//copy_tracker::key(); + copy_tracker::reset(key); + vec.emplace_back(make_record_batch(col_size)); + CHECK_EQ(copy_tracker::count(key), 0); + } +#endif TEST_CASE("extract_arrow_structures") { From 7e16ea57ffd8adbb77bd2250c99a994a514ef2e2 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Fri, 5 Sep 2025 06:20:43 +0200 Subject: [PATCH 2/3] Activated copy tracking in CI --- .github/workflows/linux.yml | 1 + .github/workflows/osx.yml | 1 + .github/workflows/windows.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c47db56fb..09c1fc777 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -143,6 +143,7 @@ jobs: -DCMAKE_CXX_STANDARD=${{matrix.cpp_standard}} \ -DUSE_DATE_POLYFILL=${{matrix.date-polyfill}} \ -DBUILD_TESTS=ON \ + -DTRACK_COPIES=ON \ -DENABLE_INTEGRATION_TEST=${{matrix.shared}} \ -DBUILD_EXAMPLES=ON \ -DCREATE_JSON_READER_TARGET=ON \ diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 585b35e74..4cdcf1bc4 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -65,6 +65,7 @@ jobs: -DCMAKE_BUILD_TYPE:STRING=${{matrix.config.name}} \ -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ -DBUILD_TESTS=ON \ + -DTRACK_COPIES=ON \ -DENABLE_INTEGRATION_TEST=ON \ -DBUILD_EXAMPLES=ON \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 8937b70b0..bdf120728 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -134,6 +134,7 @@ jobs: -DCMAKE_PREFIX_PATH=$SPARROW_DEPS_PREFIX \ -DCMAKE_INSTALL_PREFIX=$SPARROW_INSTALL_PREFIX \ -DBUILD_TESTS=ON \ + -DTRACK_COPIES=ON \ -DENABLE_INTEGRATION_TEST=${{matrix.toolchain.shared}} \ -DBUILD_EXAMPLES=ON \ -DCREATE_JSON_READER_TARGET=ON \ From 587a42ea5c65d419134107e98f7257f08b92f7a6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:14:55 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- include/sparrow/debug/copy_tracker.hpp | 1 - src/debug/copy_tracker.cpp | 2 +- test/test_record_batch.cpp | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/sparrow/debug/copy_tracker.hpp b/include/sparrow/debug/copy_tracker.hpp index 4be7c28b2..172cc2c40 100644 --- a/include/sparrow/debug/copy_tracker.hpp +++ b/include/sparrow/debug/copy_tracker.hpp @@ -26,4 +26,3 @@ namespace sparrow::copy_tracker SPARROW_API std::vector key_list(); } - diff --git a/src/debug/copy_tracker.cpp b/src/debug/copy_tracker.cpp index aa172e6f7..fa7725153 100644 --- a/src/debug/copy_tracker.cpp +++ b/src/debug/copy_tracker.cpp @@ -73,7 +73,7 @@ namespace sparrow::copy_tracker std::vector key_list() { auto keys = copy_tracker_impl::instance().key_list(); - return { keys.begin(), keys.end() }; + return {keys.begin(), keys.end()}; } #else diff --git a/test/test_record_batch.cpp b/test/test_record_batch.cpp index fd0429528..df9b5672e 100644 --- a/test/test_record_batch.cpp +++ b/test/test_record_batch.cpp @@ -201,7 +201,7 @@ namespace sparrow TEST_CASE("copy semantic") { #ifdef SPARROW_TRACK_COPIES - auto key = std::string("record_batch");//copy_tracker::key(); + auto key = std::string("record_batch"); // copy_tracker::key(); copy_tracker::reset(key); #endif auto record1 = make_record_batch(col_size); @@ -343,7 +343,7 @@ namespace sparrow { std::vector vec(3, make_record_batch(col_size)); std::cout << "capacity: " << vec.capacity() << std::endl; - auto key = std::string("record_batch");//copy_tracker::key(); + auto key = std::string("record_batch"); // copy_tracker::key(); copy_tracker::reset(key); vec.emplace_back(make_record_batch(col_size)); CHECK_EQ(copy_tracker::count(key), 0);