From 8ba0ed8adce440ab1a1e8a2bbbaa8bcf779f3307 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 16 Jun 2021 12:23:57 -0700 Subject: [PATCH 01/17] Very fast C++ log parsing --- extras/analyze/CMakeLists.txt | 33 ++++ extras/analyze/src/test.main.cpp | 309 +++++++++++++++++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 extras/analyze/CMakeLists.txt create mode 100644 extras/analyze/src/test.main.cpp diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt new file mode 100644 index 00000000..27c54774 --- /dev/null +++ b/extras/analyze/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.20.3) + +project(EventAnalyze) + +include(FetchContent) + +FetchContent_Declare( + mio + GIT_REPOSITORY https://github.com/mandreyel/mio.git + GIT_TAG 3f86a95c0784d73ce6815237ec33ed25f233b643 +) +FetchContent_MakeAvailable(mio) + +FetchContent_Declare( + FindTBB + GIT_REPOSITORY https://github.com/justusc/FindTBB.git + GIT_TAG 25ecdea817b3af4a26d74ddcd439642dbd706acb +) +FetchContent_GetProperties(FindTBB) +if(NOT findtbb_POPULATED) + FetchContent_Populate(FindTBB) + list(APPEND CMAKE_MODULE_PATH "${findtbb_SOURCE_DIR}" ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +endif() + +find_package(TBB) +if(TBB_FOUND) + add_definitions(-DHAS_TBB) + link_libraries(tbb) +endif() + +add_executable(test src/test.main.cpp) +target_link_libraries(test PRIVATE mio::mio) +target_compile_features(test PRIVATE cxx_std_20) diff --git a/extras/analyze/src/test.main.cpp b/extras/analyze/src/test.main.cpp new file mode 100644 index 00000000..31573f86 --- /dev/null +++ b/extras/analyze/src/test.main.cpp @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std::literals; + +enum class Type { + Int, + NegInt, + Float, + String, + Bool, +}; + +union Value { + std::uint64_t I; + std::int64_t SI; + double F; + std::string_view S; + bool B; +}; + +struct EventId { + std::string_view value; + + bool operator==(const EventId &) const = default; +}; + +struct EventSchema { + EventId id; + std::vector parameters; + std::vector param_types; + + bool operator==(const EventSchema &) const = default; + bool operator==(EventId rhs) const { return id == rhs; } +}; + +namespace std { +template <> struct hash { + std::size_t operator()(EventId id) const noexcept { + return std::hash()(id.value); + } +}; +template <> struct hash { + std::size_t operator()(const EventSchema &schema) const noexcept { + return std::hash()(schema.id); + } +}; +} // namespace std + +struct TransparentHash { + using is_transparent = void; + + template + std::size_t operator()(const T &it) const + noexcept(noexcept(std::hash{}(it))) { + return std::hash{}(it); + } +}; + +thread_local std::deque Values; + +struct Event { + EventId id; + std::deque const *values; + std::size_t start; // Indices into Values. + std::size_t end; +}; + +struct Block { + std::string_view name; + std::unordered_map> events; + std::string_view raw_log; +}; + +static constexpr std::string_view RegionNameEv = + R"("event_id": "ProcessDag", "name": ")"; +static const std::boyer_moore_horspool_searcher + BlockNameSearcher(RegionNameEv.begin(), RegionNameEv.end()); + +std::string_view parse_name(const std::string_view block_log) { + auto it = std::search(block_log.begin(), block_log.end(), BlockNameSearcher); + it += RegionNameEv.size(); + auto end = std::find(it, block_log.end(), '"'); + + return std::string_view(it, end); +} + +EventSchema parse_event_schema( + EventId id, + const std::vector> &init) { + EventSchema result; + result.id = id; + result.param_types.reserve(init.size() - 1); + result.parameters.reserve(init.size() - 1); + + for (std::size_t index = 0; index < init.size() - 1; ++index) { + result.parameters.push_back(init[index + 1].first); + assert(!init[index + 1].second.empty()); + if (init[index + 1].second.front() == '"') { + result.param_types.push_back(Type::String); + } else if (init[index + 1].second == "true"sv || + init[index + 1].second == "false"sv) { + result.param_types.push_back(Type::Bool); + } else if (init[index + 1].second.front() == '-') { + result.param_types.push_back(Type::NegInt); + } else { + result.param_types.push_back(Type::Int); + } + } + + return result; +} + +static std::unordered_set> + MasterSchemas; +std::mutex MasterSchemaMutex; +thread_local std::unordered_set> + Schemas; + +void update_schema_structures(EventId id, EventSchema schema) { + std::scoped_lock sl(MasterSchemaMutex); + if (MasterSchemas.find(id) == MasterSchemas.end()) + MasterSchemas.emplace_hint(MasterSchemas.end(), std::move(schema)); + Schemas = MasterSchemas; +} + +Event parse_event(const std::string_view event) { + const auto end = event.rfind('}'); + auto begin = event.find('{'); + + std::vector> result; + + while (begin < end) { + const auto key_f = event.find('"', begin + 1) + 1; + if (key_f == std::string_view::npos) + break; + const auto key_e = event.find('"', key_f); + if (key_e == std::string_view::npos) + break; + const std::string_view key = event.substr(key_f, key_e - key_f); + const auto val_f = + event.find_first_not_of(" \t\n", event.find(':', key_e + 1) + 1); + if (val_f == std::string_view::npos) + break; + const auto val_e = event[val_f] == '"' + ? event.find('"', val_f + 1) + 1 + : event.find_first_of(",} \t\n", val_f + 1); + if (val_e == std::string_view::npos) + break; + std::string_view val = event.substr(val_f, val_e - val_f); + + result.emplace_back(key, val); + begin = event.find_first_of(",}", val_e + 1); + if (begin == std::string_view::npos) + break; + begin += 1; + } + + assert(result[0].first == "event_id"sv); + EventId id(result[0].second); + + auto it = Schemas.find(id); + if (it == Schemas.end()) { + auto sch = ::parse_event_schema(id, result); + ::update_schema_structures(id, std::move(sch)); + it = Schemas.find(id); + } + + assert(it->param_types.size() == result.size() - 1); + std::size_t start = Values.size(); + for (std::size_t index = 0; index < result.size() - 1; ++index) { + Values.push_back([&]() -> Value { + switch (it->param_types[index]) { + case Type::Int: { + std::uint64_t val; + [[maybe_unused]] const auto r = std::from_chars( + result[index + 1].second.data(), + result[index + 1].second.data() + result[index + 1].second.size(), + val); + + assert(r.ptr == result[index + 1].second.data() + + result[index + 1].second.size()); + assert(r.ec == std::errc()); + + return Value{.I = val}; + } + case Type::NegInt: { + std::int64_t val; + [[maybe_unused]] const auto r = std::from_chars( + result[index + 1].second.data(), + result[index + 1].second.data() + result[index + 1].second.size(), + val); + + assert(r.ptr == result[index + 1].second.data() + + result[index + 1].second.size()); + assert(r.ec == std::errc()); + + return Value{.SI = val}; + } + case Type::Float: { + double val; + [[maybe_unused]] const auto r = std::from_chars( + result[index + 1].second.data(), + result[index + 1].second.data() + result[index + 1].second.size(), + val); + + assert(r.ptr == result[index + 1].second.data() + + result[index + 1].second.size()); + assert(r.ec == std::errc()); + + return Value{.F = val}; + } + case Type::String: + return Value{.S = result[index + 1].second.substr( + 1, result[index + 1].second.size() - 2)}; + case Type::Bool: + return Value{.B = result[index + 1].second == "true"sv}; + } + std::abort(); + }()); + } + std::size_t iend = Values.size(); + + return Event{ + .id = id, + .values = &Values, + .start = start, + .end = iend, + }; +} + +static constexpr std::string_view EventTag = R"(EVENT: {)"; +static const std::boyer_moore_horspool_searcher + EventTagSearcher(EventTag.begin(), EventTag.end()); +std::unordered_map> +parse_events(const std::string_view block_log) { + std::unordered_map> result; + + const auto e = block_log.end(); + auto b = std::search(block_log.begin(), e, EventTagSearcher); + while (b != e) { + auto line_end = std::find(b + EventTag.size() - 1, e, '\n'); + + std::string_view event(b, line_end); + + Event ev = ::parse_event(event); + result[ev.id].push_back(ev); + + b = std::search(line_end, e, EventTagSearcher); + } + + return result; +} + +Block parse_block(const std::string_view block_log) { + return Block{ + .name = ::parse_name(block_log), + .events = ::parse_events(block_log), + .raw_log = block_log, + }; +} + +std::vector split_blocks(const std::string_view file) { + static constexpr std::string_view RegionDelimiter = + "********** Opt Scheduling **********"; + const std::boyer_moore_horspool_searcher searcher(RegionDelimiter.begin(), + RegionDelimiter.end()); + + std::vector result; + + const auto e = file.end(); + auto b = std::search(file.begin(), e, searcher); + while (b != e) { + auto it = std::search(b + RegionDelimiter.size(), e, searcher); + result.emplace_back(file.data() + std::distance(file.begin(), b), + std::distance(b, it)); + b = it; + } + + return result; +} + +int main(int argc, char **argv) { + mio::mmap_source mmap(argv[1]); + std::string_view file(mmap.data(), mmap.size()); + const auto raw_blocks = ::split_blocks(file); + std::vector blocks(raw_blocks.size()); + std::transform( +#if HAS_TBB + std::execution::par_unseq, +#endif + raw_blocks.begin(), raw_blocks.end(), blocks.begin(), + [](std::string_view blk) { return ::parse_block(blk); }); + + std::cout << "done " << blocks.size() << std::endl; + std::cout << "done " << MasterSchemas.size() << std::endl; +} From fdfa748b69f54e3f9a744816c4bc63dc741d5dc9 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 16 Jun 2021 17:16:03 -0700 Subject: [PATCH 02/17] A python module that parses the blocks WIP - need to: - Benchmark name parsing - File name parsing - RawLog parsing (lazy; we don't want to hold onto the memory unless we need it) - Integration into the Python code --- extras/analyze/CMakeLists.txt | 14 ++ extras/analyze/include/parse.hpp | 11 ++ extras/analyze/include/types.hpp | 139 +++++++++++++++++ extras/analyze/src/module.cpp | 15 ++ extras/analyze/src/parse.cpp | 252 +++++++++++++++++++++++++++++++ extras/analyze/src/types.cpp | 113 ++++++++++++++ 6 files changed, 544 insertions(+) create mode 100644 extras/analyze/include/parse.hpp create mode 100644 extras/analyze/include/types.hpp create mode 100644 extras/analyze/src/module.cpp create mode 100644 extras/analyze/src/parse.cpp create mode 100644 extras/analyze/src/types.cpp diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt index 27c54774..e8c0947c 100644 --- a/extras/analyze/CMakeLists.txt +++ b/extras/analyze/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.20.3) project(EventAnalyze) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + include(FetchContent) FetchContent_Declare( @@ -11,6 +13,13 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(mio) +FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.6.2 +) +FetchContent_MakeAvailable(pybind11) + FetchContent_Declare( FindTBB GIT_REPOSITORY https://github.com/justusc/FindTBB.git @@ -28,6 +37,11 @@ if(TBB_FOUND) link_libraries(tbb) endif() +pybind11_add_module(eventanalyze src/module.cpp src/parse.cpp src/types.cpp) +target_include_directories(eventanalyze PUBLIC include) +target_link_libraries(eventanalyze PRIVATE mio::mio) +target_compile_features(eventanalyze PUBLIC cxx_std_20) + add_executable(test src/test.main.cpp) target_link_libraries(test PRIVATE mio::mio) target_compile_features(test PRIVATE cxx_std_20) diff --git a/extras/analyze/include/parse.hpp b/extras/analyze/include/parse.hpp new file mode 100644 index 00000000..446b1393 --- /dev/null +++ b/extras/analyze/include/parse.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace ev { +void defParse(pybind11::module &Mod); + +struct EventSchema; + +const EventSchema *getSchema(std::string_view Id); +} // namespace ev diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp new file mode 100644 index 00000000..a64ad09e --- /dev/null +++ b/extras/analyze/include/types.hpp @@ -0,0 +1,139 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ev { +using Number = std::variant; + +struct EventId { + std::string_view Value; + + bool operator==(const EventId &) const = default; +}; + +enum class Type { + Number, + String, + Bool, +}; + +union Value { + Number Num; + std::string_view Str; + bool Bool; +}; + +struct EventSchema { + EventId Id; + std::vector Parameters; + std::vector ParamTypes; + + bool operator==(const EventSchema &) const = default; + bool operator==(EventId Rhs) const { return Id == Rhs; } +}; + +struct Event { + EventId Id; + const std::deque *Values; + std::size_t Start; // Indices into values. + std::size_t End; + + bool operator==(EventId Rhs) const { return Id == Rhs; } +}; + +struct EventIdHash { + using is_transparent = void; + + std::size_t operator()(std::string_view Id) const noexcept { + return std::hash()(Id); + } + + std::size_t operator()(EventId Id) const noexcept { + return (*this)(Id.Value); + } + + std::size_t operator()(const EventSchema &Schema) const noexcept { + return (*this)(Schema.Id); + } + + std::size_t operator()(const std::vector &Events) const noexcept { + assert(!Events.empty()); + return (*this)(Events.front()); + } + + std::size_t operator()(const Event &Event) const noexcept { + return (*this)(Event.Id); + } +}; + +struct EventIdEq { + using is_transparent = void; + + template + bool operator()(const T &Lhs, const U &Rhs) const { + return Lhs == Rhs; + } + + template + bool operator()(const T &Lhs, const std::vector &Rhs) const { + assert(!Rhs.empty()); + return (*this)(Lhs, Rhs.front()); + } + + template + bool operator()(const std::vector &Lhs, const U &Rhs) const { + assert(!Lhs.empty()); + return (*this)(Lhs.front(), Rhs); + } + + template + bool operator()(const std::vector &Lhs, const std::vector &Rhs) const { + assert(!Lhs.empty() && !Rhs.empty()); + return (*this)(Lhs.front(), Rhs.front()); + } + + bool operator()(const Event &Lhs, const Event &Rhs) const { + return (*this)(Lhs.Id, Rhs.Id); + } +}; + +using BlockEventMap = + std::unordered_set, EventIdHash, EventIdEq>; + +struct BlockEvents { + const BlockEventMap *Events; +}; + +struct UnloadedRawLog { + std::size_t Offset; + std::size_t Size; +}; + +using RawLog = std::variant; + +struct Block { + std::string_view Name; + BlockEventMap Events; + ev::RawLog RawLog; +}; + +struct Benchmark { + std::string Name; + std::vector Blocks; +}; + +struct Logs { + std::vector Benchmarks; +}; + +void defTypes(pybind11::module &Mod); +} // namespace ev diff --git a/extras/analyze/src/module.cpp b/extras/analyze/src/module.cpp new file mode 100644 index 00000000..2df30cba --- /dev/null +++ b/extras/analyze/src/module.cpp @@ -0,0 +1,15 @@ +#include + +#include "types.hpp" +#include "parse.hpp" + +namespace py = pybind11; + +PYBIND11_MODULE(eventanalyze, Mod) { + Mod.doc() = "C++-accelerated event logging types and parser"; + + Mod.attr("VERSION") = std::tuple(1, 0); + + ev::defTypes(Mod); + ev::defParse(Mod); +} diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp new file mode 100644 index 00000000..33f1bb81 --- /dev/null +++ b/extras/analyze/src/parse.cpp @@ -0,0 +1,252 @@ +#include "parse.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "types.hpp" + +using namespace std::literals; +using namespace ev; +namespace py = pybind11; + +thread_local std::deque Values; +thread_local std::unordered_set Strings; + +static constexpr std::string_view RegionNameEv = + R"("event_id": "ProcessDag", "name": ")"; +static const std::boyer_moore_horspool_searcher + BlockNameSearcher(RegionNameEv.begin(), RegionNameEv.end()); + +static std::string_view parseName(const std::string_view BlockLog) { + auto It = std::search(BlockLog.begin(), BlockLog.end(), BlockNameSearcher); + It += RegionNameEv.size(); + auto End = std::find(It, BlockLog.end(), '"'); + + return std::string_view(It, End); +} + +static EventSchema parse_event_schema( + EventId Id, + const std::vector> &Init) { + EventSchema Result; + Result.Id = EventId(*Strings.insert(std::string(Id.Value)).first); + Result.ParamTypes.reserve(Init.size() - 1); + Result.Parameters.reserve(Init.size() - 1); + + for (std::size_t Index = 0; Index < Init.size() - 1; ++Index) { + Result.Parameters.push_back( + *Strings.insert(std::string(Init[Index + 1].first)).first); + assert(!Init[Index + 1].second.empty()); + if (Init[Index + 1].second.front() == '"') { + Result.ParamTypes.push_back(Type::String); + } else if (Init[Index + 1].second == "true"sv || + Init[Index + 1].second == "false"sv) { + Result.ParamTypes.push_back(Type::Bool); + } else { + Result.ParamTypes.push_back(Type::Number); + } + } + + return Result; +} + +static std::unordered_set MasterSchemas; +static std::mutex MasterSchemaMutex; +thread_local std::unordered_set Schemas; + +static void update_schema_structures(EventId Id, EventSchema schema) { + std::scoped_lock Lock(MasterSchemaMutex); + if (MasterSchemas.find(Id) == MasterSchemas.end()) + MasterSchemas.emplace_hint(MasterSchemas.end(), std::move(schema)); + Schemas = MasterSchemas; +} + +static Event parseEvent(const std::string_view Event) { + const auto End = Event.rfind('}'); + auto Begin = Event.find('{'); + + std::vector> Result; + + while (Begin < End) { + const auto KeyF = Event.find('"', Begin + 1) + 1; + if (KeyF == std::string_view::npos) + break; + const auto KeyE = Event.find('"', KeyF); + if (KeyE == std::string_view::npos) + break; + const std::string_view Key = Event.substr(KeyF, KeyE - KeyF); + const auto ValF = + Event.find_first_not_of(" \t\n", Event.find(':', KeyE + 1) + 1); + if (ValF == std::string_view::npos) + break; + const auto ValE = Event[ValF] == '"' + ? Event.find('"', ValF + 1) + 1 + : Event.find_first_of(",} \t\n", ValF + 1); + if (ValE == std::string_view::npos) + break; + std::string_view Val = Event.substr(ValF, ValE - ValF); + + Result.emplace_back(Key, Val); + Begin = Event.find_first_of(",}", ValE + 1); + if (Begin == std::string_view::npos) + break; + Begin += 1; + } + + assert(Result[0].first == "event_id"sv); + EventId Id(Result[0].second); + + auto It = Schemas.find(Id); + if (It == Schemas.end()) { + auto Sch = ::parse_event_schema(Id, Result); + ::update_schema_structures(Id, std::move(Sch)); + It = Schemas.find(Id); + } + + Id = It->Id; // Update to the non-dangling Id. + + assert(It->ParamTypes.size() == Result.size() - 1); + std::size_t start = Values.size(); + for (std::size_t Index = 0; Index < Result.size() - 1; ++Index) { + const std::string_view Data = Result[Index + 1].second; + Values.push_back([&]() -> Value { + switch (It->ParamTypes[Index]) { + case Type::Number: { + std::int64_t I64; + [[maybe_unused]] const auto Ri64 = + std::from_chars(Data.data(), Data.data() + Data.size(), I64); + if (Ri64.ec == std::errc() && Ri64.ptr == Data.data() + Data.size()) { + return Value{.Num = Number(I64)}; + } + + std::uint64_t U64; + [[maybe_unused]] const auto Ru64 = + std::from_chars(Data.data(), Data.data() + Data.size(), U64); + if (Ru64.ec == std::errc() && Ru64.ptr == Data.data() + Data.size()) { + return Value{.Num = Number(U64)}; + } + + double Fl; + [[maybe_unused]] const auto Rfl = + std::from_chars(Data.data(), Data.data() + Data.size(), Fl); + if (Rfl.ec == std::errc() && Rfl.ptr == Data.data() + Data.size()) { + return Value{.Num = Number(Fl)}; + } + } + case Type::String: + return Value{ + .Str = *Strings.insert(std::string(Data.substr(1, Data.size() - 2))) + .first}; + case Type::Bool: + return Value{.Bool = Data == "true"sv}; + } + std::abort(); + }()); + } + std::size_t iend = Values.size(); + + return ev::Event{ + .Id = Id, + .Values = &Values, + .Start = start, + .End = iend, + }; +} + +static constexpr std::string_view EventTag = R"(EVENT: {)"; +static const std::boyer_moore_horspool_searcher + EventTagSearcher(EventTag.begin(), EventTag.end()); + +static BlockEventMap parseEvents(const std::string_view BlockLog) { + std::unordered_map, EventIdHash, EventIdEq> + Result; + + const auto E = BlockLog.end(); + auto B = std::search(BlockLog.begin(), E, EventTagSearcher); + while (B != E) { + auto line_end = std::find(B + EventTag.size() - 1, E, '\n'); + + std::string_view Event(B, line_end); + + ev::Event Ev = ::parseEvent(Event); + Result[Ev.Id].push_back(Ev); + + B = std::search(line_end, E, EventTagSearcher); + } + + auto Vals = std::ranges::views::values(Result); + + return BlockEventMap(std::make_move_iterator(Vals.begin()), + std::make_move_iterator(Vals.end())); +} + +static Block parseBlock(const std::string_view WholeFile, + const std::string_view BlockLog) { + return Block{ + .Name = ::parseName(BlockLog), + .Events = ::parseEvents(BlockLog), + .RawLog = + UnloadedRawLog{ + .Offset = BlockLog.data() - WholeFile.data(), + .Size = BlockLog.size(), + }, + }; +} + +static std::vector split_blocks(const std::string_view file) { + static constexpr std::string_view RegionDelimiter = + "********** Opt Scheduling **********"; + const std::boyer_moore_horspool_searcher searcher(RegionDelimiter.begin(), + RegionDelimiter.end()); + + std::vector Result; + + const auto E = file.end(); + auto B = std::search(file.begin(), E, searcher); + while (B != E) { + auto It = std::search(B + RegionDelimiter.size(), E, searcher); + Result.emplace_back(file.data() + std::distance(file.begin(), B), + std::distance(B, It)); + B = It; + } + + return Result; +} + +void ev::defParse(py::module &Mod) { + Mod.def("parse_blocks", [](const std::string &path) { + mio::mmap_source MMap(path.c_str()); + std::string_view File(MMap.data(), MMap.size()); + const auto RawBlocks = ::split_blocks(File); + std::vector Blocks(RawBlocks.size()); + std::transform( +#if HAS_TBB + std::execution::par_unseq, +#endif + RawBlocks.begin(), RawBlocks.end(), Blocks.begin(), + [File](std::string_view Blk) { return ::parseBlock(File, Blk); }); + + return Blocks; + }); +} + +const EventSchema *ev::getSchema(std::string_view Id) { + auto It = MasterSchemas.find(EventId(Id)); + if (It == MasterSchemas.end()) + return nullptr; + return &*It; +} diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp new file mode 100644 index 00000000..019ca041 --- /dev/null +++ b/extras/analyze/src/types.cpp @@ -0,0 +1,113 @@ +#include "types.hpp" + +#include + +#include + +#include "parse.hpp" + +using namespace std::literals; +namespace py = pybind11; + +void ev::defTypes(py::module &Mod) { + py::class_(Mod, "_Event") + .def("__getitem__", + [](const Event &Event, std::string_view Property) -> py::object { + const EventSchema *Schema = ev::getSchema(Event.Id.Value); + if (!Schema) { + throw py::key_error("Unknown event " + + std::string(Event.Id.Value)); + } + auto Index = + std::distance(Schema->Parameters.begin(), + std::find(Schema->Parameters.begin(), + Schema->Parameters.end(), Property)); + + const Value Val = (*Event.Values)[Event.Start + Index]; + switch (Schema->ParamTypes[Index]) { + case Type::Number: + return std::visit( + [](T x)->py::object { + if constexpr (std::same_as) + return py::float_(x); + else + return py::int_(x); + }, + Val.Num); + case Type::String: + return py::str(std::string(Val.Str)); + case Type::Bool: + return py::bool_(Val.Bool); + } + std::abort(); + }) + .def("__repr__", [](const Event &Event) { + const EventSchema *Schema = ev::getSchema(Event.Id.Value); + if (!Schema) { + throw py::key_error("Unknown event " + std::string(Event.Id.Value)); + } + + std::ostringstream out; + out << '{'; + for (std::size_t Index = 0; Index < Schema->Parameters.size(); + ++Index) { + if (Index != 0) + out << ", "; + out << '\'' << Schema->Parameters[Index] << "': "; + const Value Val = (*Event.Values)[Event.Start + Index]; + switch (Schema->ParamTypes[Index]) { + case Type::Number: + std::visit([&out](auto x) { out << x; }, Val.Num); + case Type::String: + out << '\'' << Val.Str << '\''; + case Type::Bool: + out << std::boolalpha << Val.Bool; + } + } + out << '}'; + + return out.str(); + }); + + py::class_(Mod, "_BlockEvents") + .def("__getitem__", + [](const BlockEvents &Events, + std::string_view EvId) -> const std::vector & { + auto It = Events.Events->find(EventId(EvId)); + if (It != Events.Events->end()) { + return *It; + } else { + throw py::key_error(std::string(EvId)); + } + }); + + py::class_(Mod, "Block") + .def_readonly("name", &Block::Name) + .def_readonly("raw_log", &Block::RawLog) + .def_property_readonly( + "events", [](const Block &Blk) { return BlockEvents(&Blk.Events); }) + .def("single", + [](const Block &Blk, std::string_view EvId) { + auto It = Blk.Events.find(EventId(EvId)); + if (It != Blk.Events.end()) { + if (It->size() != 1) { + throw std::invalid_argument("Multiple events for " + + std::string(EvId)); + } + return It->front(); + } else { + throw py::key_error(std::string(EvId)); + } + }) + .def("__contains__", + [](const Block &Blk, std::string_view EvId) { + return Blk.Events.contains(EventId(EvId)); + }) + .def("__repr__", [](const Block &Blk) { + return ""; + }); + + py::class_(Mod, "Logs"); + py::class_(Mod, "Benchmark"); +} From e810c2f9143c90fa44264b935f5ec4cc67be1740 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Fri, 18 Jun 2021 17:49:56 -0700 Subject: [PATCH 03/17] Restructure type hierarchy, flesh out types Getting close --- extras/analyze/CMakeLists.txt | 4 + extras/analyze/include/types.hpp | 38 +++++--- extras/analyze/src/parse.cpp | 156 +++++++++++++++++++++++++++---- extras/analyze/src/types.cpp | 74 ++++++++++++--- 4 files changed, 224 insertions(+), 48 deletions(-) diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt index e8c0947c..84310ad5 100644 --- a/extras/analyze/CMakeLists.txt +++ b/extras/analyze/CMakeLists.txt @@ -37,6 +37,10 @@ if(TBB_FOUND) link_libraries(tbb) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL GNU) + add_compile_options(-Wall -Wextra) +endif() + pybind11_add_module(eventanalyze src/module.cpp src/parse.cpp src/types.cpp) target_include_directories(eventanalyze PUBLIC include) target_link_libraries(eventanalyze PRIVATE mio::mio) diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index a64ad09e..a27b3a96 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -1,16 +1,19 @@ #pragma once -#include - #include #include +#include #include #include +#include #include #include #include #include +#include +#include + namespace ev { using Number = std::variant; @@ -109,30 +112,39 @@ struct EventIdEq { using BlockEventMap = std::unordered_set, EventIdHash, EventIdEq>; -struct BlockEvents { - const BlockEventMap *Events; -}; +struct Logs; +struct Benchmark; -struct UnloadedRawLog { +struct Block { + std::string_view Name; + BlockEventMap Events; + // Offset & size into the mmapped file. std::size_t Offset; std::size_t Size; -}; -using RawLog = std::variant; + std::string UniqueId; -struct Block { - std::string_view Name; - BlockEventMap Events; - ev::RawLog RawLog; + ev::Benchmark *Bench; + + std::string File; // Which file was compiled for this block }; struct Benchmark { std::string Name; std::vector Blocks; + // Offset & size into the mmapped file. + std::size_t Offset; + std::size_t Size; + + // Keep the memory around so that we can detect if the Logs object was + // destroyed, giving the Python user a good error message. + std::weak_ptr Logs; }; struct Logs { - std::vector Benchmarks; + std::string LogFile; + mio::mmap_source MMap; + std::vector> Benchmarks; }; void defTypes(pybind11::module &Mod); diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 33f1bb81..0d739f03 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -12,9 +12,9 @@ #include #include #include +#include #include -#include #include #include "types.hpp" @@ -146,6 +146,7 @@ static Event parseEvent(const std::string_view Event) { if (Rfl.ec == std::errc() && Rfl.ptr == Data.data() + Data.size()) { return Value{.Num = Number(Fl)}; } + std::abort(); } case Type::String: return Value{ @@ -194,16 +195,25 @@ static BlockEventMap parseEvents(const std::string_view BlockLog) { std::make_move_iterator(Vals.end())); } -static Block parseBlock(const std::string_view WholeFile, +static Block parseBlock(ev::Benchmark *Bench, const std::size_t Offset, const std::string_view BlockLog) { + std::string_view Name = ::parseName(BlockLog); + BlockEventMap Events = ::parseEvents(BlockLog); + std::string UniqueId = Bench->Name + ':' + std::string(Name); + auto PF = Events.find(EventId("PassFinished")); + if (PF != Events.end()) { + UniqueId += + ",pass=" + + std::to_string(std::get(Values[PF->front().Start].Num)); + } return Block{ - .Name = ::parseName(BlockLog), - .Events = ::parseEvents(BlockLog), - .RawLog = - UnloadedRawLog{ - .Offset = BlockLog.data() - WholeFile.data(), - .Size = BlockLog.size(), - }, + .Name = std::move(Name), + .Events = std::move(Events), + .Offset = Offset, + .Size = BlockLog.size(), + .UniqueId = std::move(UniqueId), + .Bench = Bench, + .File = "", // TODO: Get this information too }; } @@ -227,20 +237,126 @@ static std::vector split_blocks(const std::string_view file) { return Result; } -void ev::defParse(py::module &Mod) { - Mod.def("parse_blocks", [](const std::string &path) { - mio::mmap_source MMap(path.c_str()); - std::string_view File(MMap.data(), MMap.size()); - const auto RawBlocks = ::split_blocks(File); - std::vector Blocks(RawBlocks.size()); - std::transform( +namespace { +struct BenchmarkRegion { + std::string BenchmarkName; + // The offset in the file + std::size_t Offset; +}; + +enum class BenchmarkRE : int { + Spec, +}; +} // namespace + +static std::shared_ptr parse(std::weak_ptr Logs, + const std::string_view File, + BenchmarkRegion Bench) { + const auto RawBlocks = ::split_blocks(File); + std::vector Blocks(RawBlocks.size()); + + auto Result = std::make_shared(); + Result->Name = Bench.BenchmarkName; + Result->Logs = std::move(Logs); + Result->Offset = Bench.Offset; + Result->Size = File.size(); + + std::transform( #if HAS_TBB - std::execution::par_unseq, + std::execution::par_unseq, #endif - RawBlocks.begin(), RawBlocks.end(), Blocks.begin(), - [File](std::string_view Blk) { return ::parseBlock(File, Blk); }); + RawBlocks.begin(), RawBlocks.end(), Blocks.begin(), + [File, Bench = Result.get()](std::string_view Blk) { + return ::parseBlock(Bench, Bench->Offset + (Blk.begin() - File.begin()), + Blk); + }); + + Result->Blocks = std::move(Blocks); + + return Result; +} + +static constexpr std::string_view SpecBenchmarkRegion = R"( Building )"; +static const std::boyer_moore_horspool_searcher + SpecBenchmarkSearcher(SpecBenchmarkRegion.begin(), + SpecBenchmarkRegion.end()); +static std::vector splitSpecBenchmarks(std::string_view File) { + std::vector Result; - return Blocks; + auto B = File.begin(); + auto E = File.end(); + while (B != E) { + auto It = std::search(B, E, SpecBenchmarkSearcher); + if (It == E) + break; + It += SpecBenchmarkRegion.size(); + auto EndOfName = std::find(It, E, ' '); + + const auto Name = std::string_view(It, EndOfName); + const std::size_t Offset = It - File.begin(); + + Result.emplace_back(std::string(Name), Offset); + + B = It; + } + + return Result; +} + +void ev::defParse(py::module &Mod) { + // static constexpr std::string_view BenchmarkRE + Mod.attr("SPEC_BENCH_RE") = (int)BenchmarkRE::Spec; + + Mod.def("parse_blocks", [](const std::string &Path, + // One of the RE types. + int REChoice) { + if (REChoice != (int)BenchmarkRE::Spec) { + throw py::value_error("Unknown regular expression number " + + std::to_string(REChoice)); + } + auto Logs = std::make_shared(); + Logs->LogFile = std::move(Path); + Logs->MMap = mio::mmap_source(Logs->LogFile); + const std::string_view File(Logs->MMap.data(), Logs->MMap.size()); + + const std::vector BenchmarkSections = + [&]() -> std::vector { + switch ((BenchmarkRE)REChoice) { + case BenchmarkRE::Spec: + return splitSpecBenchmarks(File); + } + std::abort(); + }(); + + Logs->Benchmarks.reserve(BenchmarkSections.size()); + for (std::size_t Index = 0; Index < BenchmarkSections.size(); ++Index) { + const std::size_t Offset = BenchmarkSections[Index].Offset; + const std::size_t OffsetEnd = Index + 1 < BenchmarkSections.size() + ? BenchmarkSections[Index + 1].Offset + : File.size(); + + const std::string_view Section = File.substr(Offset, OffsetEnd - Offset); + + Logs->Benchmarks.push_back( + ::parse(Logs, Section, std::move(BenchmarkSections[Index]))); + } + + Logs->MMap.unmap(); // Usually we don't need this around + return Logs; + }); + Mod.def("parse_blocks", [](const std::string &Path, + // A single benchmark name for the whole logs. + std::string_view BenchmarkName) { + auto Logs = std::make_shared(); + Logs->LogFile = std::move(Path); + Logs->MMap = mio::mmap_source(Logs->LogFile); + const std::string_view File(Logs->MMap.data(), Logs->MMap.size()); + Logs->Benchmarks.push_back( + ::parse(Logs, File, BenchmarkRegion{std::string(BenchmarkName), 0})); + + Logs->MMap.unmap(); // Usually we don't need this around + + return Logs; }); } diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index 019ca041..d9de0de9 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -58,10 +58,13 @@ void ev::defTypes(py::module &Mod) { switch (Schema->ParamTypes[Index]) { case Type::Number: std::visit([&out](auto x) { out << x; }, Val.Num); + break; case Type::String: out << '\'' << Val.Str << '\''; + break; case Type::Bool: out << std::boolalpha << Val.Bool; + break; } } out << '}'; @@ -69,23 +72,29 @@ void ev::defTypes(py::module &Mod) { return out.str(); }); - py::class_(Mod, "_BlockEvents") + py::class_(Mod, "Block") + .def_readonly("name", &Block::Name) + // .def_readonly("raw_log", &Block::RawLog) .def("__getitem__", - [](const BlockEvents &Events, + [](const Block &Blk, std::string_view EvId) -> const std::vector & { - auto It = Events.Events->find(EventId(EvId)); - if (It != Events.Events->end()) { + auto It = Blk.Events.find(EventId(EvId)); + if (It != Blk.Events.end()) { return *It; } else { throw py::key_error(std::string(EvId)); } - }); - - py::class_(Mod, "Block") - .def_readonly("name", &Block::Name) - .def_readonly("raw_log", &Block::RawLog) - .def_property_readonly( - "events", [](const Block &Blk) { return BlockEvents(&Blk.Events); }) + }) + .def("get", + [](const Block &Blk, std::string_view EvId, + py::object default_) -> py::object { + auto It = Blk.Events.find(EventId(EvId)); + if (It != Blk.Events.end()) { + return py::cast(*It); + } else { + return default_; + } + }) .def("single", [](const Block &Blk, std::string_view EvId) { auto It = Blk.Events.find(EventId(EvId)); @@ -104,10 +113,45 @@ void ev::defTypes(py::module &Mod) { return Blk.Events.contains(EventId(EvId)); }) .def("__repr__", [](const Block &Blk) { - return ""; + return "Name + ", file="s + Blk.File + + ", "s + std::to_string(Blk.Events.size()) + " events)>"; + }); + + py::class_>(Mod, "Benchmark") + .def_readonly("name", &Benchmark::Name) + .def_readonly("blocks", &Benchmark::Blocks) + .def("__repr__", [](const Benchmark &Bench) { + return ""; }); + py::class_>(Mod, "Logs") + .def_readonly("benchmarks", &Logs::Benchmarks) + .def("benchmark", + [](const ev::Logs &Logs, const std::string_view BenchName) { + auto It = + std::find_if(Logs.Benchmarks.begin(), Logs.Benchmarks.end(), + [BenchName](const auto &Bench) { + return Bench->Name == BenchName; + }); - py::class_(Mod, "Logs"); - py::class_(Mod, "Benchmark"); + if (It == Logs.Benchmarks.end()) { + throw py::key_error("No benchmark `" + std::string(BenchName) + + "` in this Logs"); + } else { + return It->get(); + } + }) + .def("__repr__", [](const ev::Logs &Logs) { + std::string Result = "Name; + } + + return Result + ")>"; + }); } From 0ab7b0e200505b388c3e678e481519516d42e883 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 19 Aug 2021 14:36:47 -0700 Subject: [PATCH 04/17] Clean up types --- .gitignore | 4 +-- extras/analyze/include/types.hpp | 55 +++++++++++--------------------- 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index be2ee7fd..6f35c36b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/build -/.vscode +build/ +.vscode/ __pycache__ *.pyc diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index a27b3a96..f8ad04b8 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -41,7 +41,6 @@ struct EventSchema { std::vector ParamTypes; bool operator==(const EventSchema &) const = default; - bool operator==(EventId Rhs) const { return Id == Rhs; } }; struct Event { @@ -49,10 +48,21 @@ struct Event { const std::deque *Values; std::size_t Start; // Indices into values. std::size_t End; - - bool operator==(EventId Rhs) const { return Id == Rhs; } }; +inline EventId getId(EventId Id) { return Id; } + +template +requires requires(const T &It) { + { It.Id } -> std::convertible_to; +} +EventId getId(const T &It) { return It.Id; } + +template EventId getId(const std::vector &Vec) { + assert(!Vec.empty()); + return getId(Vec.front()); +} + struct EventIdHash { using is_transparent = void; @@ -64,48 +74,19 @@ struct EventIdHash { return (*this)(Id.Value); } - std::size_t operator()(const EventSchema &Schema) const noexcept { - return (*this)(Schema.Id); - } - - std::size_t operator()(const std::vector &Events) const noexcept { - assert(!Events.empty()); - return (*this)(Events.front()); - } - - std::size_t operator()(const Event &Event) const noexcept { - return (*this)(Event.Id); + template std::size_t operator()(const T &It) const noexcept { + return (*this)(getId(It)); } }; struct EventIdEq { using is_transparent = void; - template - bool operator()(const T &Lhs, const U &Rhs) const { - return Lhs == Rhs; - } - - template - bool operator()(const T &Lhs, const std::vector &Rhs) const { - assert(!Rhs.empty()); - return (*this)(Lhs, Rhs.front()); - } - - template - bool operator()(const std::vector &Lhs, const U &Rhs) const { - assert(!Lhs.empty()); - return (*this)(Lhs.front(), Rhs); - } + bool operator()(EventId Lhs, EventId Rhs) const { return Lhs == Rhs; } template - bool operator()(const std::vector &Lhs, const std::vector &Rhs) const { - assert(!Lhs.empty() && !Rhs.empty()); - return (*this)(Lhs.front(), Rhs.front()); - } - - bool operator()(const Event &Lhs, const Event &Rhs) const { - return (*this)(Lhs.Id, Rhs.Id); + bool operator()(const T &Lhs, const U &Rhs) const { + return getId(Lhs) == getId(Rhs); } }; From 5d412b6257ea506a7ddd0fd636ff6598ec5d693f Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 19 Aug 2021 15:18:54 -0700 Subject: [PATCH 05/17] Clean up parsing --- extras/analyze/include/types.hpp | 14 +++---- extras/analyze/src/parse.cpp | 71 +++++++++++++++++++++----------- extras/analyze/src/types.cpp | 6 ++- 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index f8ad04b8..28fa23b6 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -5,13 +5,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include namespace ev { @@ -99,9 +99,7 @@ struct Benchmark; struct Block { std::string_view Name; BlockEventMap Events; - // Offset & size into the mmapped file. - std::size_t Offset; - std::size_t Size; + std::string_view RawLog; std::string UniqueId; @@ -113,9 +111,7 @@ struct Block { struct Benchmark { std::string Name; std::vector Blocks; - // Offset & size into the mmapped file. - std::size_t Offset; - std::size_t Size; + std::string_view RawLog; // Keep the memory around so that we can detect if the Logs object was // destroyed, giving the Python user a good error message. @@ -123,8 +119,8 @@ struct Benchmark { }; struct Logs { - std::string LogFile; - mio::mmap_source MMap; + std::filesystem::path LogFile; + std::string RawLog; std::vector> Benchmarks; }; diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 0d739f03..c246fb6c 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -1,10 +1,13 @@ #include "parse.hpp" #include +#include #include #include #include #include +#include +#include #include #include #include @@ -22,15 +25,37 @@ using namespace std::literals; using namespace ev; namespace py = pybind11; +namespace fs = std::filesystem; thread_local std::deque Values; thread_local std::unordered_set Strings; +// Read a whole file in at once +std::string slurp(const fs::path &Path) { + // Open first to help ensure that we get the correct file size + std::ifstream File(Path); + + std::string Result; + Result.resize(fs::file_size(Path)); + + File.read(Result.data(), Result.size()); + // In case there's anything left over + while (File) { + static constexpr std::size_t BufSize = 1024; + std::array Buffer; + File.read(Buffer.data(), Buffer.size()); + Result.insert(Result.end(), Buffer.begin(), Buffer.end()); + } + + return Result; +} + static constexpr std::string_view RegionNameEv = R"("event_id": "ProcessDag", "name": ")"; static const std::boyer_moore_horspool_searcher BlockNameSearcher(RegionNameEv.begin(), RegionNameEv.end()); +// Extracts the name of the block static std::string_view parseName(const std::string_view BlockLog) { auto It = std::search(BlockLog.begin(), BlockLog.end(), BlockNameSearcher); It += RegionNameEv.size(); @@ -39,7 +64,8 @@ static std::string_view parseName(const std::string_view BlockLog) { return std::string_view(It, End); } -static EventSchema parse_event_schema( +// Parses out an EventSchema, which is shared for all events of that EventId. +static EventSchema parseEventSchema( EventId Id, const std::vector> &Init) { EventSchema Result; @@ -64,11 +90,14 @@ static EventSchema parse_event_schema( return Result; } +// Schemas are globally loaded. +// This static/thread_local dance is to make it appropriately thread safe but +// still fast. static std::unordered_set MasterSchemas; static std::mutex MasterSchemaMutex; thread_local std::unordered_set Schemas; -static void update_schema_structures(EventId Id, EventSchema schema) { +static void updateSchemaStructures(EventId Id, EventSchema schema) { std::scoped_lock Lock(MasterSchemaMutex); if (MasterSchemas.find(Id) == MasterSchemas.end()) MasterSchemas.emplace_hint(MasterSchemas.end(), std::move(schema)); @@ -112,8 +141,8 @@ static Event parseEvent(const std::string_view Event) { auto It = Schemas.find(Id); if (It == Schemas.end()) { - auto Sch = ::parse_event_schema(Id, Result); - ::update_schema_structures(Id, std::move(Sch)); + auto Sch = ::parseEventSchema(Id, Result); + ::updateSchemaStructures(Id, std::move(Sch)); It = Schemas.find(Id); } @@ -195,8 +224,7 @@ static BlockEventMap parseEvents(const std::string_view BlockLog) { std::make_move_iterator(Vals.end())); } -static Block parseBlock(ev::Benchmark *Bench, const std::size_t Offset, - const std::string_view BlockLog) { +static Block parseBlock(ev::Benchmark *Bench, const std::string_view BlockLog) { std::string_view Name = ::parseName(BlockLog); BlockEventMap Events = ::parseEvents(BlockLog); std::string UniqueId = Bench->Name + ':' + std::string(Name); @@ -209,15 +237,14 @@ static Block parseBlock(ev::Benchmark *Bench, const std::size_t Offset, return Block{ .Name = std::move(Name), .Events = std::move(Events), - .Offset = Offset, - .Size = BlockLog.size(), + .RawLog = BlockLog, .UniqueId = std::move(UniqueId), .Bench = Bench, .File = "", // TODO: Get this information too }; } -static std::vector split_blocks(const std::string_view file) { +static std::vector splitBlocks(const std::string_view file) { static constexpr std::string_view RegionDelimiter = "********** Opt Scheduling **********"; const std::boyer_moore_horspool_searcher searcher(RegionDelimiter.begin(), @@ -252,23 +279,21 @@ enum class BenchmarkRE : int { static std::shared_ptr parse(std::weak_ptr Logs, const std::string_view File, BenchmarkRegion Bench) { - const auto RawBlocks = ::split_blocks(File); + const auto RawBlocks = ::splitBlocks(File); std::vector Blocks(RawBlocks.size()); auto Result = std::make_shared(); Result->Name = Bench.BenchmarkName; Result->Logs = std::move(Logs); - Result->Offset = Bench.Offset; - Result->Size = File.size(); + Result->RawLog = File; std::transform( #if HAS_TBB std::execution::par_unseq, #endif RawBlocks.begin(), RawBlocks.end(), Blocks.begin(), - [File, Bench = Result.get()](std::string_view Blk) { - return ::parseBlock(Bench, Bench->Offset + (Blk.begin() - File.begin()), - Blk); + [Bench = Result.get()](std::string_view Blk) { + return ::parseBlock(Bench, Blk); }); Result->Blocks = std::move(Blocks); @@ -307,7 +332,7 @@ void ev::defParse(py::module &Mod) { // static constexpr std::string_view BenchmarkRE Mod.attr("SPEC_BENCH_RE") = (int)BenchmarkRE::Spec; - Mod.def("parse_blocks", [](const std::string &Path, + Mod.def("parse_blocks", [](const fs::path &Path, // One of the RE types. int REChoice) { if (REChoice != (int)BenchmarkRE::Spec) { @@ -316,8 +341,8 @@ void ev::defParse(py::module &Mod) { } auto Logs = std::make_shared(); Logs->LogFile = std::move(Path); - Logs->MMap = mio::mmap_source(Logs->LogFile); - const std::string_view File(Logs->MMap.data(), Logs->MMap.size()); + Logs->RawLog = ::slurp(Logs->LogFile); + const std::string_view File = Logs->RawLog; const std::vector BenchmarkSections = [&]() -> std::vector { @@ -341,21 +366,19 @@ void ev::defParse(py::module &Mod) { ::parse(Logs, Section, std::move(BenchmarkSections[Index]))); } - Logs->MMap.unmap(); // Usually we don't need this around return Logs; }); - Mod.def("parse_blocks", [](const std::string &Path, + Mod.def("parse_blocks", [](const fs::path &Path, // A single benchmark name for the whole logs. std::string_view BenchmarkName) { auto Logs = std::make_shared(); Logs->LogFile = std::move(Path); - Logs->MMap = mio::mmap_source(Logs->LogFile); - const std::string_view File(Logs->MMap.data(), Logs->MMap.size()); + Logs->RawLog = ::slurp(Logs->LogFile); + const std::string_view File = Logs->RawLog; + Logs->Benchmarks.push_back( ::parse(Logs, File, BenchmarkRegion{std::string(BenchmarkName), 0})); - Logs->MMap.unmap(); // Usually we don't need this around - return Logs; }); } diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index d9de0de9..e5284b53 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -27,7 +27,7 @@ void ev::defTypes(py::module &Mod) { switch (Schema->ParamTypes[Index]) { case Type::Number: return std::visit( - [](T x)->py::object { + [](T x) -> py::object { if constexpr (std::same_as) return py::float_(x); else @@ -74,7 +74,7 @@ void ev::defTypes(py::module &Mod) { py::class_(Mod, "Block") .def_readonly("name", &Block::Name) - // .def_readonly("raw_log", &Block::RawLog) + .def_readonly("raw_log", &Block::RawLog) .def("__getitem__", [](const Block &Blk, std::string_view EvId) -> const std::vector & { @@ -119,6 +119,7 @@ void ev::defTypes(py::module &Mod) { py::class_>(Mod, "Benchmark") .def_readonly("name", &Benchmark::Name) + .def_readonly("raw_log", &Benchmark::RawLog) .def_readonly("blocks", &Benchmark::Blocks) .def("__repr__", [](const Benchmark &Bench) { return ">(Mod, "Logs") .def_readonly("benchmarks", &Logs::Benchmarks) + .def_readonly("raw_log", &Logs::RawLog) .def("benchmark", [](const ev::Logs &Logs, const std::string_view BenchName) { auto It = From 9dc3a7526b72a8e96146fe39331b60bc7dd239ed Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 19 Aug 2021 16:24:34 -0700 Subject: [PATCH 06/17] Accept pathlib.Path to parse --- extras/analyze/CMakeLists.txt | 15 +- extras/analyze/include/parse.hpp | 2 +- extras/analyze/include/py.hpp | 36 ++++ extras/analyze/include/types.hpp | 2 +- extras/analyze/src/module.cpp | 7 +- extras/analyze/src/parse.cpp | 3 +- extras/analyze/src/test.main.cpp | 309 ------------------------------- extras/analyze/src/types.cpp | 3 +- 8 files changed, 45 insertions(+), 332 deletions(-) create mode 100644 extras/analyze/include/py.hpp delete mode 100644 extras/analyze/src/test.main.cpp diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt index 84310ad5..d64d1575 100644 --- a/extras/analyze/CMakeLists.txt +++ b/extras/analyze/CMakeLists.txt @@ -6,13 +6,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include(FetchContent) -FetchContent_Declare( - mio - GIT_REPOSITORY https://github.com/mandreyel/mio.git - GIT_TAG 3f86a95c0784d73ce6815237ec33ed25f233b643 -) -FetchContent_MakeAvailable(mio) - FetchContent_Declare( pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11.git @@ -41,11 +34,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL GNU) add_compile_options(-Wall -Wextra) endif() -pybind11_add_module(eventanalyze src/module.cpp src/parse.cpp src/types.cpp) +file(GLOB_RECURSE sources CONFIGURE_DEPENDS "src/*.cpp") +pybind11_add_module(eventanalyze ${sources}) target_include_directories(eventanalyze PUBLIC include) -target_link_libraries(eventanalyze PRIVATE mio::mio) target_compile_features(eventanalyze PUBLIC cxx_std_20) - -add_executable(test src/test.main.cpp) -target_link_libraries(test PRIVATE mio::mio) -target_compile_features(test PRIVATE cxx_std_20) diff --git a/extras/analyze/include/parse.hpp b/extras/analyze/include/parse.hpp index 446b1393..e3a4f0b2 100644 --- a/extras/analyze/include/parse.hpp +++ b/extras/analyze/include/parse.hpp @@ -1,6 +1,6 @@ #pragma once -#include +#include "py.hpp" namespace ev { void defParse(pybind11::module &Mod); diff --git a/extras/analyze/include/py.hpp b/extras/analyze/include/py.hpp new file mode 100644 index 00000000..e445e080 --- /dev/null +++ b/extras/analyze/include/py.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace pybind11::detail { +template <> struct type_caster { +public: + PYBIND11_TYPE_CASTER(std::filesystem::path, _("pathlib.Path | str")); + + // Python -> C++ + bool load(handle Src, bool) { + // If !isinstance(Src, str): + if (!PyUnicode_Check(Src.ptr())) { + object PyPath = module::import("pathlib").attr("Path"); + + if (!PyObject_IsInstance(Src.ptr(), PyPath.ptr())) + return false; + } + this->value = std::filesystem::path(std::string(str(Src))); + return true; + } + + static handle cast(const std::filesystem::path &Path, return_value_policy, + handle) { + object PyPath = module::import("pathlib").attr("Path"); + return PyPath(str(Path.string())); + } +}; +} // namespace pybind11::detail diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index 28fa23b6..3af5d2a2 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -12,7 +12,7 @@ #include #include -#include +#include "py.hpp" namespace ev { using Number = std::variant; diff --git a/extras/analyze/src/module.cpp b/extras/analyze/src/module.cpp index 2df30cba..c0a0418f 100644 --- a/extras/analyze/src/module.cpp +++ b/extras/analyze/src/module.cpp @@ -1,14 +1,13 @@ -#include - -#include "types.hpp" #include "parse.hpp" +#include "py.hpp" +#include "types.hpp" namespace py = pybind11; PYBIND11_MODULE(eventanalyze, Mod) { Mod.doc() = "C++-accelerated event logging types and parser"; - Mod.attr("VERSION") = std::tuple(1, 0); + Mod.attr("VERSION") = std::tuple(1, 0, 0); ev::defTypes(Mod); ev::defParse(Mod); diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index c246fb6c..3f4a90b9 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -18,8 +18,7 @@ #include #include -#include - +#include "py.hpp" #include "types.hpp" using namespace std::literals; diff --git a/extras/analyze/src/test.main.cpp b/extras/analyze/src/test.main.cpp deleted file mode 100644 index 31573f86..00000000 --- a/extras/analyze/src/test.main.cpp +++ /dev/null @@ -1,309 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -using namespace std::literals; - -enum class Type { - Int, - NegInt, - Float, - String, - Bool, -}; - -union Value { - std::uint64_t I; - std::int64_t SI; - double F; - std::string_view S; - bool B; -}; - -struct EventId { - std::string_view value; - - bool operator==(const EventId &) const = default; -}; - -struct EventSchema { - EventId id; - std::vector parameters; - std::vector param_types; - - bool operator==(const EventSchema &) const = default; - bool operator==(EventId rhs) const { return id == rhs; } -}; - -namespace std { -template <> struct hash { - std::size_t operator()(EventId id) const noexcept { - return std::hash()(id.value); - } -}; -template <> struct hash { - std::size_t operator()(const EventSchema &schema) const noexcept { - return std::hash()(schema.id); - } -}; -} // namespace std - -struct TransparentHash { - using is_transparent = void; - - template - std::size_t operator()(const T &it) const - noexcept(noexcept(std::hash{}(it))) { - return std::hash{}(it); - } -}; - -thread_local std::deque Values; - -struct Event { - EventId id; - std::deque const *values; - std::size_t start; // Indices into Values. - std::size_t end; -}; - -struct Block { - std::string_view name; - std::unordered_map> events; - std::string_view raw_log; -}; - -static constexpr std::string_view RegionNameEv = - R"("event_id": "ProcessDag", "name": ")"; -static const std::boyer_moore_horspool_searcher - BlockNameSearcher(RegionNameEv.begin(), RegionNameEv.end()); - -std::string_view parse_name(const std::string_view block_log) { - auto it = std::search(block_log.begin(), block_log.end(), BlockNameSearcher); - it += RegionNameEv.size(); - auto end = std::find(it, block_log.end(), '"'); - - return std::string_view(it, end); -} - -EventSchema parse_event_schema( - EventId id, - const std::vector> &init) { - EventSchema result; - result.id = id; - result.param_types.reserve(init.size() - 1); - result.parameters.reserve(init.size() - 1); - - for (std::size_t index = 0; index < init.size() - 1; ++index) { - result.parameters.push_back(init[index + 1].first); - assert(!init[index + 1].second.empty()); - if (init[index + 1].second.front() == '"') { - result.param_types.push_back(Type::String); - } else if (init[index + 1].second == "true"sv || - init[index + 1].second == "false"sv) { - result.param_types.push_back(Type::Bool); - } else if (init[index + 1].second.front() == '-') { - result.param_types.push_back(Type::NegInt); - } else { - result.param_types.push_back(Type::Int); - } - } - - return result; -} - -static std::unordered_set> - MasterSchemas; -std::mutex MasterSchemaMutex; -thread_local std::unordered_set> - Schemas; - -void update_schema_structures(EventId id, EventSchema schema) { - std::scoped_lock sl(MasterSchemaMutex); - if (MasterSchemas.find(id) == MasterSchemas.end()) - MasterSchemas.emplace_hint(MasterSchemas.end(), std::move(schema)); - Schemas = MasterSchemas; -} - -Event parse_event(const std::string_view event) { - const auto end = event.rfind('}'); - auto begin = event.find('{'); - - std::vector> result; - - while (begin < end) { - const auto key_f = event.find('"', begin + 1) + 1; - if (key_f == std::string_view::npos) - break; - const auto key_e = event.find('"', key_f); - if (key_e == std::string_view::npos) - break; - const std::string_view key = event.substr(key_f, key_e - key_f); - const auto val_f = - event.find_first_not_of(" \t\n", event.find(':', key_e + 1) + 1); - if (val_f == std::string_view::npos) - break; - const auto val_e = event[val_f] == '"' - ? event.find('"', val_f + 1) + 1 - : event.find_first_of(",} \t\n", val_f + 1); - if (val_e == std::string_view::npos) - break; - std::string_view val = event.substr(val_f, val_e - val_f); - - result.emplace_back(key, val); - begin = event.find_first_of(",}", val_e + 1); - if (begin == std::string_view::npos) - break; - begin += 1; - } - - assert(result[0].first == "event_id"sv); - EventId id(result[0].second); - - auto it = Schemas.find(id); - if (it == Schemas.end()) { - auto sch = ::parse_event_schema(id, result); - ::update_schema_structures(id, std::move(sch)); - it = Schemas.find(id); - } - - assert(it->param_types.size() == result.size() - 1); - std::size_t start = Values.size(); - for (std::size_t index = 0; index < result.size() - 1; ++index) { - Values.push_back([&]() -> Value { - switch (it->param_types[index]) { - case Type::Int: { - std::uint64_t val; - [[maybe_unused]] const auto r = std::from_chars( - result[index + 1].second.data(), - result[index + 1].second.data() + result[index + 1].second.size(), - val); - - assert(r.ptr == result[index + 1].second.data() + - result[index + 1].second.size()); - assert(r.ec == std::errc()); - - return Value{.I = val}; - } - case Type::NegInt: { - std::int64_t val; - [[maybe_unused]] const auto r = std::from_chars( - result[index + 1].second.data(), - result[index + 1].second.data() + result[index + 1].second.size(), - val); - - assert(r.ptr == result[index + 1].second.data() + - result[index + 1].second.size()); - assert(r.ec == std::errc()); - - return Value{.SI = val}; - } - case Type::Float: { - double val; - [[maybe_unused]] const auto r = std::from_chars( - result[index + 1].second.data(), - result[index + 1].second.data() + result[index + 1].second.size(), - val); - - assert(r.ptr == result[index + 1].second.data() + - result[index + 1].second.size()); - assert(r.ec == std::errc()); - - return Value{.F = val}; - } - case Type::String: - return Value{.S = result[index + 1].second.substr( - 1, result[index + 1].second.size() - 2)}; - case Type::Bool: - return Value{.B = result[index + 1].second == "true"sv}; - } - std::abort(); - }()); - } - std::size_t iend = Values.size(); - - return Event{ - .id = id, - .values = &Values, - .start = start, - .end = iend, - }; -} - -static constexpr std::string_view EventTag = R"(EVENT: {)"; -static const std::boyer_moore_horspool_searcher - EventTagSearcher(EventTag.begin(), EventTag.end()); -std::unordered_map> -parse_events(const std::string_view block_log) { - std::unordered_map> result; - - const auto e = block_log.end(); - auto b = std::search(block_log.begin(), e, EventTagSearcher); - while (b != e) { - auto line_end = std::find(b + EventTag.size() - 1, e, '\n'); - - std::string_view event(b, line_end); - - Event ev = ::parse_event(event); - result[ev.id].push_back(ev); - - b = std::search(line_end, e, EventTagSearcher); - } - - return result; -} - -Block parse_block(const std::string_view block_log) { - return Block{ - .name = ::parse_name(block_log), - .events = ::parse_events(block_log), - .raw_log = block_log, - }; -} - -std::vector split_blocks(const std::string_view file) { - static constexpr std::string_view RegionDelimiter = - "********** Opt Scheduling **********"; - const std::boyer_moore_horspool_searcher searcher(RegionDelimiter.begin(), - RegionDelimiter.end()); - - std::vector result; - - const auto e = file.end(); - auto b = std::search(file.begin(), e, searcher); - while (b != e) { - auto it = std::search(b + RegionDelimiter.size(), e, searcher); - result.emplace_back(file.data() + std::distance(file.begin(), b), - std::distance(b, it)); - b = it; - } - - return result; -} - -int main(int argc, char **argv) { - mio::mmap_source mmap(argv[1]); - std::string_view file(mmap.data(), mmap.size()); - const auto raw_blocks = ::split_blocks(file); - std::vector blocks(raw_blocks.size()); - std::transform( -#if HAS_TBB - std::execution::par_unseq, -#endif - raw_blocks.begin(), raw_blocks.end(), blocks.begin(), - [](std::string_view blk) { return ::parse_block(blk); }); - - std::cout << "done " << blocks.size() << std::endl; - std::cout << "done " << MasterSchemas.size() << std::endl; -} diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index e5284b53..03420390 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -2,9 +2,8 @@ #include -#include - #include "parse.hpp" +#include "py.hpp" using namespace std::literals; namespace py = pybind11; From 2ae7a2b7ea2188678e0b9d7eb55e5839f8b3989e Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 19 Aug 2021 16:59:15 -0700 Subject: [PATCH 07/17] Expose the C++ parser on the Python side --- util/analyze/_main.py | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/util/analyze/_main.py b/util/analyze/_main.py index e34f92ed..68258d36 100644 --- a/util/analyze/_main.py +++ b/util/analyze/_main.py @@ -1,11 +1,13 @@ -import pickle import argparse -import json import fnmatch +import json +from pathlib import Path +import pickle +import sys from typing import Callable -from .imports import * from ._types import Block, Logs +from .imports import * def __load_file(file): @@ -66,12 +68,44 @@ def parse_args(parser: argparse.ArgumentParser, *names, args=None): type=json.loads, help='Keep blocks matching (JSON format)', ) + parser.add_argument( + '--use-c++', + dest='use_cpp', + action='store_true', + help='Use the accelerated C++ parser. The eventparser module is expected to be on the PYTHONPATH', + ) + parser.add_argument( + '--c++-module', + dest='cpp_module', + type=Path, + default=None, + help='The path to the accelerated C++ parser module. --use-c++ is unnecessary if this is supplied.', + ) args = parser.parse_args(args) + use_cpp = bool(args.use_cpp or args.cpp_module) + + if use_cpp and args.benchsuite != 'spec': + print(f'WARNING: Unable to use the C++-accelerated parser for {args.benchsuite}', file=sys.stderr) + + def cpp_parse_blocks_fn(): + if args.cpp_module: + import importlib + import importlib.util + spec = importlib.util.spec_from_file_location('eventanalyze', args.cpp_module) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + else: + mod = __import__('eventanalyze') + + def parse(file): + return mod.parse_blocks(file, mod.SPEC_BENCH_RE) + return parse + FILE_PARSERS = { 'pickle': __load_filepath, - 'spec': import_cpu2006.parse, + 'spec': cpp_parse_blocks_fn() if use_cpp else import_cpu2006.parse, 'plaidml': import_plaidml.parse, 'shoc': import_shoc.parse, } From 7e84b902888fb06aab4ffc21d79febb759d46048 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 19 Aug 2021 17:30:36 -0700 Subject: [PATCH 08/17] Optimize benchmark and block access Previously, accessing a single element required creating the entire array. Adding these reference types solves that issue. --- extras/analyze/include/types.hpp | 4 +-- extras/analyze/src/parse.cpp | 25 +++++---------- extras/analyze/src/types.cpp | 53 +++++++++++++++++++++++++++++--- 3 files changed, 57 insertions(+), 25 deletions(-) diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index 3af5d2a2..922976b7 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -45,9 +45,7 @@ struct EventSchema { struct Event { EventId Id; - const std::deque *Values; - std::size_t Start; // Indices into values. - std::size_t End; + std::vector Values; }; inline EventId getId(EventId Id) { return Id; } diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 3f4a90b9..4d00e6b0 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -26,9 +26,6 @@ using namespace ev; namespace py = pybind11; namespace fs = std::filesystem; -thread_local std::deque Values; -thread_local std::unordered_set Strings; - // Read a whole file in at once std::string slurp(const fs::path &Path) { // Open first to help ensure that we get the correct file size @@ -68,13 +65,12 @@ static EventSchema parseEventSchema( EventId Id, const std::vector> &Init) { EventSchema Result; - Result.Id = EventId(*Strings.insert(std::string(Id.Value)).first); + Result.Id = EventId(Id.Value); Result.ParamTypes.reserve(Init.size() - 1); Result.Parameters.reserve(Init.size() - 1); for (std::size_t Index = 0; Index < Init.size() - 1; ++Index) { - Result.Parameters.push_back( - *Strings.insert(std::string(Init[Index + 1].first)).first); + Result.Parameters.push_back(Init[Index + 1].first); assert(!Init[Index + 1].second.empty()); if (Init[Index + 1].second.front() == '"') { Result.ParamTypes.push_back(Type::String); @@ -148,7 +144,8 @@ static Event parseEvent(const std::string_view Event) { Id = It->Id; // Update to the non-dangling Id. assert(It->ParamTypes.size() == Result.size() - 1); - std::size_t start = Values.size(); + std::vector Values; + for (std::size_t Index = 0; Index < Result.size() - 1; ++Index) { const std::string_view Data = Result[Index + 1].second; Values.push_back([&]() -> Value { @@ -177,23 +174,15 @@ static Event parseEvent(const std::string_view Event) { std::abort(); } case Type::String: - return Value{ - .Str = *Strings.insert(std::string(Data.substr(1, Data.size() - 2))) - .first}; + return Value{.Str = Data.substr(1, Data.size() - 2)}; case Type::Bool: return Value{.Bool = Data == "true"sv}; } std::abort(); }()); } - std::size_t iend = Values.size(); - return ev::Event{ - .Id = Id, - .Values = &Values, - .Start = start, - .End = iend, - }; + return ev::Event{.Id = Id, .Values = std::move(Values)}; } static constexpr std::string_view EventTag = R"(EVENT: {)"; @@ -231,7 +220,7 @@ static Block parseBlock(ev::Benchmark *Bench, const std::string_view BlockLog) { if (PF != Events.end()) { UniqueId += ",pass=" + - std::to_string(std::get(Values[PF->front().Start].Num)); + std::to_string(std::get(PF->front().Values.front().Num)); } return Block{ .Name = std::move(Name), diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index 03420390..c86911ed 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -1,5 +1,6 @@ #include "types.hpp" +#include #include #include "parse.hpp" @@ -8,6 +9,21 @@ using namespace std::literals; namespace py = pybind11; +namespace { +template +const T &index_into(std::span Span, std::int64_t index) { + if (index < 0) { + // Negative index indexes from the end + index += Span.size(); + } + if (index < 0 || static_cast(index) >= Span.size()) { + throw py::index_error("Index out of bounds: " + std::to_string(index) + + "/" + std::to_string(Span.size())); + } + return Span[index]; +} +} // namespace + void ev::defTypes(py::module &Mod) { py::class_(Mod, "_Event") .def("__getitem__", @@ -22,7 +38,7 @@ void ev::defTypes(py::module &Mod) { std::find(Schema->Parameters.begin(), Schema->Parameters.end(), Property)); - const Value Val = (*Event.Values)[Event.Start + Index]; + const Value Val = Event.Values[Index]; switch (Schema->ParamTypes[Index]) { case Type::Number: return std::visit( @@ -53,7 +69,7 @@ void ev::defTypes(py::module &Mod) { if (Index != 0) out << ", "; out << '\'' << Schema->Parameters[Index] << "': "; - const Value Val = (*Event.Values)[Event.Start + Index]; + const Value Val = Event.Values[Index]; switch (Schema->ParamTypes[Index]) { case Type::Number: std::visit([&out](auto x) { out << x; }, Val.Num); @@ -116,16 +132,45 @@ void ev::defTypes(py::module &Mod) { ", "s + std::to_string(Blk.Events.size()) + " events)>"; }); + struct BenchmarkBlocks { + std::span Blocks; + }; + + py::class_(Mod, "_Blocks") + .def("__getitem__", + [](const BenchmarkBlocks &Blocks, std::int64_t index) { + return ::index_into(Blocks.Blocks, index); + }) + .def("__len__", + [](const BenchmarkBlocks &Blocks) { return Blocks.Blocks.size(); }); + py::class_>(Mod, "Benchmark") .def_readonly("name", &Benchmark::Name) .def_readonly("raw_log", &Benchmark::RawLog) - .def_readonly("blocks", &Benchmark::Blocks) + .def_property_readonly( + "blocks", + [](const Benchmark &Bench) { return BenchmarkBlocks{Bench.Blocks}; }) .def("__repr__", [](const Benchmark &Bench) { return ""; }); + + struct LogsBenchmarks { + std::span> Benchmarks; + }; + py::class_(Mod, "_Benchmarks") + .def("__getitem__", + [](const LogsBenchmarks &Benchmarks, std::int64_t index) { + return ::index_into(Benchmarks.Benchmarks, index); + }) + .def("__len__", [](const LogsBenchmarks &Benchmarks) { + return Benchmarks.Benchmarks.size(); + }); + py::class_>(Mod, "Logs") - .def_readonly("benchmarks", &Logs::Benchmarks) + .def_property_readonly( + "benchmarks", + [](const ev::Logs &Logs) { return LogsBenchmarks{Logs.Benchmarks}; }) .def_readonly("raw_log", &Logs::RawLog) .def("benchmark", [](const ev::Logs &Logs, const std::string_view BenchName) { From 4463b07355ae5b30323bf246c08d21cf81a55e1d Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Fri, 20 Aug 2021 13:26:11 -0700 Subject: [PATCH 09/17] Fix .raw_log string decoding error Stray null bytes --- extras/analyze/src/parse.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 4d00e6b0..2a3e8e88 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -43,6 +43,8 @@ std::string slurp(const fs::path &Path) { Result.insert(Result.end(), Buffer.begin(), Buffer.end()); } + Result.erase(Result.find('\0'), Result.size()); + return Result; } From b05893f781f2de7c9ca9b799455c438784d442c7 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Fri, 20 Aug 2021 14:27:32 -0700 Subject: [PATCH 10/17] Smoothly connect the C++ and Python side of things --- extras/analyze/src/types.cpp | 32 ++---- util/analyze/_cpp_types.py | 134 +++++++++++++++++++++++++ util/analyze/_main.py | 6 +- util/analyze/_types.py | 31 +++--- util/analyze/imports/import_plaidml.py | 5 +- util/analyze/imports/import_shoc.py | 5 +- 6 files changed, 171 insertions(+), 42 deletions(-) create mode 100644 util/analyze/_cpp_types.py diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index c86911ed..a407631a 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -100,29 +100,18 @@ void ev::defTypes(py::module &Mod) { throw py::key_error(std::string(EvId)); } }) - .def("get", - [](const Block &Blk, std::string_view EvId, - py::object default_) -> py::object { - auto It = Blk.Events.find(EventId(EvId)); - if (It != Blk.Events.end()) { - return py::cast(*It); - } else { - return default_; - } - }) - .def("single", - [](const Block &Blk, std::string_view EvId) { - auto It = Blk.Events.find(EventId(EvId)); - if (It != Blk.Events.end()) { - if (It->size() != 1) { - throw std::invalid_argument("Multiple events for " + - std::string(EvId)); - } - return It->front(); - } else { - throw py::key_error(std::string(EvId)); + .def("_event_names", + [](const Block &Blk) { + std::vector Names; + Names.reserve(Blk.Events.size()); + + for (const auto &Events : Blk.Events) { + Names.push_back(ev::getId(Events).Value); } + + return Names; }) + .def_readonly("uniqueid", &Block::UniqueId) .def("__contains__", [](const Block &Blk, std::string_view EvId) { return Blk.Events.contains(EventId(EvId)); @@ -187,6 +176,7 @@ void ev::defTypes(py::module &Mod) { return It->get(); } }) + .def("__iter__", [](py::handle Logs) { return Logs.attr("benchmarks"); }) .def("__repr__", [](const ev::Logs &Logs) { std::string Result = " bool: + return event_name in self.__cpp + + def __iter__(self) -> Iterator[str]: + return iter(self.__cpp._event_names()) + + def __repr__(self): + return repr(self.__cpp) + + def uniqueid(self): + return self.__cpp.uniqueid + + class _BenchmarkBlocks: + def __init__(self, blocks: cpp._Blocks): + self.__cpp = blocks + + def __getitem__(self, index: int) -> _types.Block: + return Block(self.__cpp[index]) + + def __len__(self) -> int: + return len(self.__cpp) + + def __repr__(self): + return repr(self.__cpp) + + class Benchmark(_types.Benchmark): + def __init__(self, benchmark: cpp.Benchmark): + self.__cpp = benchmark + self.name = self.__cpp.name + + @property + def blocks(self): + return _BenchmarkBlocks(self.__cpp.blocks) + + @property + def raw_log(self): + return self.__cpp.raw_log + + # Inherit __iter__ + + # Inherit .benchmarks + + def __repr__(self): + return repr(self.__cpp) + + def keep_blocks_if(self, p): + return _types.Benchmark( + {'name': self.name}, + list(filter(p, self)), + ) + + class Logs(_types.Logs): + def __init__(self, logs: cpp.Logs): + self.__cpp = logs + self.benchmarks = list(Benchmark(bench) for bench in logs.benchmarks) + + @property + def raw_log(self): + return self.__cpp.raw_log + + def benchmark(self, name: str) -> _types.Benchmark: + for bench in self.benchmarks: + if bench.name == name: + return bench + + raise KeyError(f'No benchmark `{name}` in this Logs') + + def __iter__(self): + for bench in self.benchmarks: + yield from bench + + def __repr__(self): + return repr(self.__cpp) + + def keep_blocks_if(self, p): + return _types.Logs([b.keep_blocks_if(p) for b in self.benchmarks]) + + return { + 'Logs': Logs, + 'Benchmark': Benchmark, + 'Block': Block, + } + + +class _M: + def __init__(self): + self.__cpp = None + + @property + def VERSION(self): + return self.__cpp.VERSION + + @property + def __doc__(self): + return self.__cpp.__doc__ + + def load_module(self, cpp): + self.__cpp = cpp + classes = _make_classes(self.__cpp) + self.Logs = classes['Logs'] + self.Benchmark = classes['Benchmark'] + self.Block = classes['Block'] + + def parse_blocks(self, file, benchspec: Union[str, int]) -> _types.Logs: + return self.Logs(self.__cpp.parse_blocks(file, benchspec)) + + +sys.modules[__name__] = _M() diff --git a/util/analyze/_main.py b/util/analyze/_main.py index 68258d36..be88bd93 100644 --- a/util/analyze/_main.py +++ b/util/analyze/_main.py @@ -7,6 +7,7 @@ from typing import Callable from ._types import Block, Logs +from . import _cpp_types from .imports import * @@ -99,8 +100,11 @@ def cpp_parse_blocks_fn(): else: mod = __import__('eventanalyze') + _cpp_types.load_module(mod) + def parse(file): - return mod.parse_blocks(file, mod.SPEC_BENCH_RE) + return _cpp_types.parse_blocks(file, mod.SPEC_BENCH_RE) + return parse FILE_PARSERS = { diff --git a/util/analyze/_types.py b/util/analyze/_types.py index 8151bdc6..01339f16 100644 --- a/util/analyze/_types.py +++ b/util/analyze/_types.py @@ -1,3 +1,17 @@ +def merge_logs(lhs, rhs): + ''' + Merges the logs from the rhs into the lhs. + + The rhs must have different benchmarks from the lhs + ''' + in_both = set(lhs.benchmarks) & set(rhs.benchmarks) + if in_both: + raise ValueError( + 'Cannot merge Logs which share common benchmarks', in_both) + + lhs.benchmarks += rhs.benchmarks + + class Logs: ''' Abstracts a log file as a collection of benchmarks @@ -9,21 +23,6 @@ class Logs: def __init__(self, benchmarks): self.benchmarks = benchmarks - def merge(self, rhs): - ''' - Merges the logs from the rhs into this. - - The rhs must have different benchmarks from this Logs - ''' - in_both = set(self.benchmarks) & set(rhs.benchmarks) - if in_both: - raise ValueError( - 'Cannot merge Logs which share common benchmarks', in_both) - - self.benchmarks += rhs.benchmarks - - return self - def benchmark(self, name): ''' Gets the benchmark with the specified name @@ -103,7 +102,7 @@ def single(self, event_name): raises AssertionError if there is not exactly one event with the specified name ''' - result = self.events[event_name] + result = self[event_name] if len(result) != 1: raise AssertionError(f'Multiple events for {event_name}') diff --git a/util/analyze/imports/import_plaidml.py b/util/analyze/imports/import_plaidml.py index dd890d75..b14af55a 100644 --- a/util/analyze/imports/import_plaidml.py +++ b/util/analyze/imports/import_plaidml.py @@ -3,7 +3,7 @@ import os import pathlib -from .._types import Logs +from .._types import Logs, merge_logs from . import import_utils @@ -22,7 +22,8 @@ def parse(path): with logfiles[0].open('r') as f: benchname = benchmark_dir.stem - result.merge( + merge_logs( + result, import_utils.parse_single_bench_file( f.read(), benchname=benchname) ) diff --git a/util/analyze/imports/import_shoc.py b/util/analyze/imports/import_shoc.py index 5174724d..874d2b10 100644 --- a/util/analyze/imports/import_shoc.py +++ b/util/analyze/imports/import_shoc.py @@ -4,7 +4,7 @@ import re import pathlib -from .._types import Logs +from .._types import Logs, merge_logs from . import import_utils @@ -25,7 +25,8 @@ def parse(path): for benchmark in benchmarks: with benchmark.open('r') as f: benchname = benchname_re.search(benchmark.stem).group(1) - result.merge( + merge_logs( + result, import_utils.parse_single_bench_file( f.read(), benchname=benchname) ) From 116b12beaf915303a883b8c83a2907cda8271549 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Fri, 20 Aug 2021 14:42:10 -0700 Subject: [PATCH 11/17] Revert back to using mmap to read the files --- extras/analyze/CMakeLists.txt | 8 ++++++++ extras/analyze/include/types.hpp | 5 ++++- extras/analyze/src/parse.cpp | 31 ++++--------------------------- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt index d64d1575..2a7382c2 100644 --- a/extras/analyze/CMakeLists.txt +++ b/extras/analyze/CMakeLists.txt @@ -6,6 +6,13 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include(FetchContent) +FetchContent_Declare( + mio + GIT_REPOSITORY https://github.com/mandreyel/mio.git + GIT_TAG 3f86a95c0784d73ce6815237ec33ed25f233b643 +) +FetchContent_MakeAvailable(mio) + FetchContent_Declare( pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11.git @@ -38,3 +45,4 @@ file(GLOB_RECURSE sources CONFIGURE_DEPENDS "src/*.cpp") pybind11_add_module(eventanalyze ${sources}) target_include_directories(eventanalyze PUBLIC include) target_compile_features(eventanalyze PUBLIC cxx_std_20) +target_link_libraries(eventanalyze PRIVATE mio::mio) diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index 922976b7..5b966aa8 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -14,6 +14,8 @@ #include "py.hpp" +#include + namespace ev { using Number = std::variant; @@ -118,7 +120,8 @@ struct Benchmark { struct Logs { std::filesystem::path LogFile; - std::string RawLog; + mio::mmap_source MMap; + std::string_view RawLog; std::vector> Benchmarks; }; diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 2a3e8e88..45d90659 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -4,11 +4,8 @@ #include #include #include -#include #include #include -#include -#include #include #include #include @@ -26,28 +23,6 @@ using namespace ev; namespace py = pybind11; namespace fs = std::filesystem; -// Read a whole file in at once -std::string slurp(const fs::path &Path) { - // Open first to help ensure that we get the correct file size - std::ifstream File(Path); - - std::string Result; - Result.resize(fs::file_size(Path)); - - File.read(Result.data(), Result.size()); - // In case there's anything left over - while (File) { - static constexpr std::size_t BufSize = 1024; - std::array Buffer; - File.read(Buffer.data(), Buffer.size()); - Result.insert(Result.end(), Buffer.begin(), Buffer.end()); - } - - Result.erase(Result.find('\0'), Result.size()); - - return Result; -} - static constexpr std::string_view RegionNameEv = R"("event_id": "ProcessDag", "name": ")"; static const std::boyer_moore_horspool_searcher @@ -331,7 +306,8 @@ void ev::defParse(py::module &Mod) { } auto Logs = std::make_shared(); Logs->LogFile = std::move(Path); - Logs->RawLog = ::slurp(Logs->LogFile); + Logs->MMap = mio::mmap_source(Logs->LogFile.string()); + Logs->RawLog = std::string_view(Logs->MMap.data(), Logs->MMap.size()); const std::string_view File = Logs->RawLog; const std::vector BenchmarkSections = @@ -363,7 +339,8 @@ void ev::defParse(py::module &Mod) { std::string_view BenchmarkName) { auto Logs = std::make_shared(); Logs->LogFile = std::move(Path); - Logs->RawLog = ::slurp(Logs->LogFile); + Logs->MMap = mio::mmap_source(Logs->LogFile.string()); + Logs->RawLog = std::string_view(Logs->MMap.data(), Logs->MMap.size()); const std::string_view File = Logs->RawLog; Logs->Benchmarks.push_back( From 1fd40af3f1192e8830f36ed337ea2961b913a8c1 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 11:02:50 -0700 Subject: [PATCH 12/17] Slight perf benefit from using swiss tables absl::flat_hash_{set,map} are called "swiss tables", and they provide a small perf win for us. --- extras/analyze/CMakeLists.txt | 11 ++++++++++- extras/analyze/include/types.hpp | 4 +++- extras/analyze/src/parse.cpp | 12 ++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/extras/analyze/CMakeLists.txt b/extras/analyze/CMakeLists.txt index 2a7382c2..9bcd8c81 100644 --- a/extras/analyze/CMakeLists.txt +++ b/extras/analyze/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.20.3) project(EventAnalyze) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) include(FetchContent) @@ -20,6 +21,14 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(pybind11) +FetchContent_Declare( + abseil + GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git + GIT_TAG f39e6ad4753e06d4a0d6a9bf6310478757479984 +) +set(BUILD_TESTING OFF) +FetchContent_MakeAvailable(abseil) + FetchContent_Declare( FindTBB GIT_REPOSITORY https://github.com/justusc/FindTBB.git @@ -45,4 +54,4 @@ file(GLOB_RECURSE sources CONFIGURE_DEPENDS "src/*.cpp") pybind11_add_module(eventanalyze ${sources}) target_include_directories(eventanalyze PUBLIC include) target_compile_features(eventanalyze PUBLIC cxx_std_20) -target_link_libraries(eventanalyze PRIVATE mio::mio) +target_link_libraries(eventanalyze PRIVATE mio::mio absl::base absl::flat_hash_map) diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index 5b966aa8..b3422a7a 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -14,6 +14,8 @@ #include "py.hpp" +#include +#include #include namespace ev { @@ -91,7 +93,7 @@ struct EventIdEq { }; using BlockEventMap = - std::unordered_set, EventIdHash, EventIdEq>; + absl::flat_hash_set, EventIdHash, EventIdEq>; struct Logs; struct Benchmark; diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 45d90659..0f8692c5 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -10,8 +10,6 @@ #include #include #include -#include -#include #include #include @@ -65,9 +63,9 @@ static EventSchema parseEventSchema( // Schemas are globally loaded. // This static/thread_local dance is to make it appropriately thread safe but // still fast. -static std::unordered_set MasterSchemas; +static absl::flat_hash_set MasterSchemas; static std::mutex MasterSchemaMutex; -thread_local std::unordered_set Schemas; +thread_local absl::flat_hash_set Schemas; static void updateSchemaStructures(EventId Id, EventSchema schema) { std::scoped_lock Lock(MasterSchemaMutex); @@ -167,7 +165,7 @@ static const std::boyer_moore_horspool_searcher EventTagSearcher(EventTag.begin(), EventTag.end()); static BlockEventMap parseEvents(const std::string_view BlockLog) { - std::unordered_map, EventIdHash, EventIdEq> + absl::flat_hash_map, EventIdHash, EventIdEq> Result; const auto E = BlockLog.end(); @@ -205,7 +203,9 @@ static Block parseBlock(ev::Benchmark *Bench, const std::string_view BlockLog) { .RawLog = BlockLog, .UniqueId = std::move(UniqueId), .Bench = Bench, - .File = "", // TODO: Get this information too + // Extracting file info costs quite a bit of time, and we never use it + // anyway. + .File = "", }; } From 9bb6d0f3c8e35e4bb5046145c36604d66302f949 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 11:31:06 -0700 Subject: [PATCH 13/17] Fix parsing skipped elements --- extras/analyze/src/parse.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/extras/analyze/src/parse.cpp b/extras/analyze/src/parse.cpp index 0f8692c5..7ffc250d 100644 --- a/extras/analyze/src/parse.cpp +++ b/extras/analyze/src/parse.cpp @@ -60,6 +60,8 @@ static EventSchema parseEventSchema( return Result; } +#include + // Schemas are globally loaded. // This static/thread_local dance is to make it appropriately thread safe but // still fast. @@ -92,22 +94,34 @@ static Event parseEvent(const std::string_view Event) { Event.find_first_not_of(" \t\n", Event.find(':', KeyE + 1) + 1); if (ValF == std::string_view::npos) break; - const auto ValE = Event[ValF] == '"' - ? Event.find('"', ValF + 1) + 1 - : Event.find_first_of(",} \t\n", ValF + 1); + const auto ValE = [&] { + if (Event[ValF] == '"') { + // Find the end of the string + return Event.find('"', ValF + // start after the open quote + + 1) + // include the end quote + + 1; + } else { + // Find the end of the number/bool/etc; either the next whitespace, the + // separating comma, or the end of the JSON object: + return Event.find_first_of(",} \t\n", ValF + 1); + } + }(); if (ValE == std::string_view::npos) break; std::string_view Val = Event.substr(ValF, ValE - ValF); Result.emplace_back(Key, Val); - Begin = Event.find_first_of(",}", ValE + 1); + // Find the start of the next element (if there is a next) + Begin = Event.find_first_of(",}", ValE); if (Begin == std::string_view::npos) break; Begin += 1; } assert(Result[0].first == "event_id"sv); - EventId Id(Result[0].second); + EventId Id(Result[0].second.substr(1, Result[0].second.size() - 2)); auto It = Schemas.find(Id); if (It == Schemas.end()) { From ab11924e5c8c362dafb4ae16aab4347d00fe6532 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 13:15:51 -0700 Subject: [PATCH 14/17] Appease clang-format-9 --- extras/analyze/include/types.hpp | 2 ++ extras/analyze/src/types.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/extras/analyze/include/types.hpp b/extras/analyze/include/types.hpp index b3422a7a..fdb8997d 100644 --- a/extras/analyze/include/types.hpp +++ b/extras/analyze/include/types.hpp @@ -54,11 +54,13 @@ struct Event { inline EventId getId(EventId Id) { return Id; } +// clang-format off template requires requires(const T &It) { { It.Id } -> std::convertible_to; } EventId getId(const T &It) { return It.Id; } +// clang-format on template EventId getId(const std::vector &Vec) { assert(!Vec.empty()); diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index a407631a..ec0ccc46 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -42,7 +42,9 @@ void ev::defTypes(py::module &Mod) { switch (Schema->ParamTypes[Index]) { case Type::Number: return std::visit( + // clang-format off [](T x) -> py::object { + // clang-format on if constexpr (std::same_as) return py::float_(x); else From cab001ec3b9f11aa473352fc2655d4aaae56e723 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 13:41:52 -0700 Subject: [PATCH 15/17] Add GitHub script to build python module Manually triggered script which uploads the artifact. --- .../build-extras-cpp-eventanalyze.yml | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 .github/workflows/build-extras-cpp-eventanalyze.yml diff --git a/.github/workflows/build-extras-cpp-eventanalyze.yml b/.github/workflows/build-extras-cpp-eventanalyze.yml new file mode 100644 index 00000000..641a2a97 --- /dev/null +++ b/.github/workflows/build-extras-cpp-eventanalyze.yml @@ -0,0 +1,80 @@ +name: Build extras/analyze + +on: + workflow_dispatch: + inputs: + python-version: + description: 'Python version to use' + required: true + default: '3.8-dev' + ref: + description: 'The OptSched git ref to checkout to build' + required: true + default: 'master' + build_type: + description: 'CMAKE_BUILD_TYPE' + required: true + default: 'Release' + +jobs: + build: + runs-on: ubuntu-20.04 + + steps: + - name: Install APT dependencies + run: | + # For parallel STL + sudo apt-get install libtbb-dev + + # For latest C++ features + sudo add-apt-repository ppa:ubuntu-toolchain-r/test + sudo apt-get update + + sudo apt-get install g++-11 + + # For pyenv python + sudo apt-get install make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev + + - name: Install Python version + run: | + curl https://pyenv.run | bash + eval "$(pyenv init --path)" + echo "PYENV_ROOT=$HOME/.pyenv" >> $GITHUB_ENV + echo "$HOME/.pyenv/bin" >> $GITHUB_PATH + + export PYENV_ROOT="$HOME/.pyenv" + export PATH="$PYENV_ROOT/bin:$PATH" + + pyenv install ${{ github.event.inputs.python-version }} + pyenv global ${{ github.event.inputs.python-version }} + + python3 --version + + - uses: actions/checkout@v2 + with: + ref: ${{ github.event.inputs.ref }} + + - name: Configure + run: | + eval "$(pyenv init --path)" + + cmake -S extras/analyze -B build \ + -DCMAKE_BUILD_TYPE=${{ github.event.inputs.build_type }} \ + -DPython_FIND_UNVERSIONED_NAMES=FIRST \ + -DPYBIND11_FINDPYTHON=ON \ + -DCMAKE_CXX_COMPILER=g++-11 + + - name: Build + run: | + eval "$(pyenv init --path)" + + cmake --build build -j 2 + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: Python-${{ github.event.inputs.python-version }} ${{ github.event.inputs.build_type }} Module + path: build/eventanalyze.*.so + if-no-files-found: error From a61e63756e01a25a8ad9781abeb0b3afefde52e8 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 15:50:03 -0700 Subject: [PATCH 16/17] Add `info' member to C++ Benchmark type --- extras/analyze/src/types.cpp | 6 ++++++ util/analyze/_cpp_types.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/extras/analyze/src/types.cpp b/extras/analyze/src/types.cpp index ec0ccc46..b849f5b9 100644 --- a/extras/analyze/src/types.cpp +++ b/extras/analyze/src/types.cpp @@ -141,6 +141,12 @@ void ev::defTypes(py::module &Mod) { .def_property_readonly( "blocks", [](const Benchmark &Bench) { return BenchmarkBlocks{Bench.Blocks}; }) + .def_property_readonly("info", + [](const Benchmark &Bench) -> py::dict { + using namespace pybind11::literals; + + return py::dict("name"_a = Bench.Name); + }) .def("__repr__", [](const Benchmark &Bench) { return ""; diff --git a/util/analyze/_cpp_types.py b/util/analyze/_cpp_types.py index 7e99f8c3..24593bc1 100644 --- a/util/analyze/_cpp_types.py +++ b/util/analyze/_cpp_types.py @@ -62,6 +62,10 @@ def blocks(self): def raw_log(self): return self.__cpp.raw_log + @property + def info(self): + return self.__cpp.info + # Inherit __iter__ # Inherit .benchmarks From debc241ce931d067a0be6f5d077fa0efca0166fb Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 17:04:10 -0700 Subject: [PATCH 17/17] Bundle the shared objects with the python module This way, by placing all of these modules in a directory on the LD_LIBRARY_PATH, g++-11, libtbb, etc. do not need to be installed to use the Python module, as the needed libraries are bundled here. --- .github/workflows/build-extras-cpp-eventanalyze.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-extras-cpp-eventanalyze.yml b/.github/workflows/build-extras-cpp-eventanalyze.yml index 641a2a97..48db3d70 100644 --- a/.github/workflows/build-extras-cpp-eventanalyze.yml +++ b/.github/workflows/build-extras-cpp-eventanalyze.yml @@ -72,9 +72,17 @@ jobs: cmake --build build -j 2 + - name: Bundle Shared Objects + run: | + cd build + + # Copy the shared object dependencies of this Python module to the current directory + ldd eventanalyze.*.so | sed -E 's/^.*=> (\S+).*$|(\S+) .*$/\1/g' | xargs -I {} cp {} . + chmod +x lib*.so* + - name: Upload Artifact uses: actions/upload-artifact@v2 with: name: Python-${{ github.event.inputs.python-version }} ${{ github.event.inputs.build_type }} Module - path: build/eventanalyze.*.so + path: build/*.so* if-no-files-found: error