diff --git a/llvm/docs/Telemetry.rst b/llvm/docs/Telemetry.rst new file mode 100644 index 0000000000000..e9d0d2cf95220 --- /dev/null +++ b/llvm/docs/Telemetry.rst @@ -0,0 +1,257 @@ +=========================== +Telemetry framework in LLVM +=========================== + +.. contents:: + :local: + +.. toctree:: + :hidden: + +Objective +========= + +Provides a common framework in LLVM for collecting various usage and performance +metrics. +It is located at ``llvm/Telemetry/Telemetry.h``. + +Characteristics +--------------- +* Configurable and extensible by: + + * Tools: any tool that wants to use Telemetry can extend and customize it. + * Vendors: Toolchain vendors can also provide custom implementation of the + library, which could either override or extend the given tool's upstream + implementation, to best fit their organization's usage and privacy models. + * End users of such tool can also configure Telemetry (as allowed by their + vendor). + +Important notes +--------------- + +* There is no concrete implementation of a Telemetry library in upstream LLVM. + We only provide the abstract API here. Any tool that wants telemetry will + implement one. + + The rationale for this is that all the tools in LLVM are very different in + what they care about (what/where/when to instrument data). Hence, it might not + be practical to have a single implementation. + However, in the future, if we see enough common pattern, we can extract them + into a shared place. This is TBD - contributions are welcome. + +* No implementation of Telemetry in upstream LLVM shall store any of the + collected data due to privacy and security reasons: + + * Different organizations have different privacy models: + + * Which data is sensitive, which is not? + * Whether it is acceptable for instrumented data to be stored anywhere? + (to a local file, what not?) + + * Data ownership and data collection consents are hard to accommodate from + LLVM developers' point of view: + + * E.g., data collected by Telemetry is not necessarily owned by the user + of an LLVM tool with Telemetry enabled, hence the user's consent to data + collection is not meaningful. On the other hand, LLVM developers have no + reasonable ways to request consent from the "real" owners. + + +High-level design +================= + +Key components +-------------- + +The framework consists of four important classes: + +* ``llvm::telemetry::Manager``: The class responsible for collecting and + transmitting telemetry data. This is the main point of interaction between the + framework and any tool that wants to enable telemetry. +* ``llvm::telemetry::TelemetryInfo``: Data courier +* ``llvm::telemetry::Destination``: Data sink to which the Telemetry framework + sends data. + Its implementation is transparent to the framework. + It is up to the vendor to decide which pieces of data to forward and where + to forward them to for their final storage. +* ``llvm::telemetry::Config``: Configurations for the ``Manager``. + +.. image:: llvm_telemetry_design.png + +How to implement and interact with the API +------------------------------------------ + +To use Telemetry in your tool, you need to provide a concrete implementation of the ``Manager`` class and ``Destination``. + +1) Define a custom ``Serializer``, ``Manager``, ``Destination`` and optionally a subclass of ``TelemetryInfo`` + +.. code-block:: c++ + + class JsonSerializer : public Serializer { + public: + json::Object *getOutputObject() { return Out.get(); } + + Error init() override { + if (Started) + return createStringError("Serializer already in use"); + started = true; + Out = std::make_unique(); + return Error::success(); + } + + // Serialize the given value. + void write(StringRef KeyName, bool Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, int Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, unsigned int Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, unsigned long Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, long Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, long long Value ) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, unsigned long long Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, StringRef Value) override { + writeHelper(KeyName, Value); + } + + void beginObject(StringRef KeyName) override { + Children.push_back(json::Object()); + ChildrenNames.push_back(KeyName.str()); + } + + void endObject() override { + assert(!Children.empty() && !ChildrenNames.empty()); + json::Value Val = json::Value(std::move(Children.back())); + std::string Name = ChildrenNames.back(); + + Children.pop_back(); + ChildrenNames.pop_back(); + writeHelper(Name, std::move(Val)); + } + + Error finalize() override { + if (!Started) + return createStringError("Serializer not currently in use"); + Started = false; + return Error::success(); + } + + private: + template void writeHelper(StringRef Name, T Value) { + assert(Started && "serializer not started"); + if (Children.empty()) + Out->try_emplace(Name, Value); + else + Children.back().try_emplace(Name, Value); + } + bool Started = false; + std::unique_ptr Out; + std::vector Children; + std::vector ChildrenNames; + }; + + class MyManager : public telemery::Manager { + public: + static std::unique_ptr createInstatnce(telemetry::Config *Config) { + // If Telemetry is not enabled, then just return null; + if (!Config->EnableTelemetry) + return nullptr; + return std::make_unique(); + } + MyManager() = default; + + Error preDispatch(TelemetryInfo *Entry) override { + Entry->SessionId = SessionId; + return Error::success(); + } + + // You can also define additional instrumentation points. + void logStartup(TelemetryInfo *Entry) { + // Add some additional data to entry. + Entry->Msg = "Some message"; + dispatch(Entry); + } + + void logAdditionalPoint(TelemetryInfo *Entry) { + // .... code here + } + + private: + const std::string SessionId; + }; + + class MyDestination : public telemetry::Destination { + public: + Error receiveEntry(const TelemetryInfo *Entry) override { + if (Error Err = Serializer.init()) + return Err; + + Entry->serialize(Serializer); + if (Error Err = Serializer.finalize()) + return Err; + + json::Object Copied = *Serializer.getOutputObject(); + // Send the `Copied` object to wherever. + return Error::success(); + } + + private: + JsonSerializer Serializer; + }; + + // This defines a custom TelemetryInfo that has an additional Msg field. + struct MyTelemetryInfo : public telemetry::TelemetryInfo { + std::string Msg; + + Error serialize(Serializer &Serializer) const override { + TelemetryInfo::serialize(serializer); + Serializer.writeString("MyMsg", Msg); + } + + // Note: implement getKind() and classof() to support dyn_cast operations. + }; + + +2) Use the library in your tool. + +Logging the tool init-process: + +.. code-block:: c++ + + // In tool's initialization code. + auto StartTime = std::chrono::time_point::now(); + telemetry::Config MyConfig = makeConfig(); // Build up the appropriate Config struct here. + auto Manager = MyManager::createInstance(&MyConfig); + + + // Any other tool's init code can go here. + // ... + + // Finally, take a snapshot of the time now so we know how long it took the + // init process to finish. + auto EndTime = std::chrono::time_point::now(); + MyTelemetryInfo Entry; + + Entry.Start = StartTime; + Entry.End = EndTime; + Manager->logStartup(&Entry); + +Similar code can be used for logging the tool's exit. diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 0b204d512876a..6eee564713d6d 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -72,6 +72,7 @@ intermediate LLVM representation. SupportLibrary TableGen/index TableGenFundamentals + Telemetry Vectorizers WritingAnLLVMPass WritingAnLLVMNewPMPass @@ -293,3 +294,6 @@ Additional Topics :doc:`Sandbox IR ` This document describes the design and usage of Sandbox IR, a transactional layer over LLVM IR. + +:doc:`Telemetry` + This document describes the Telemetry framework in LLVM. diff --git a/llvm/docs/llvm_telemetry_design.png b/llvm/docs/llvm_telemetry_design.png new file mode 100644 index 0000000000000..48fbc003da18e Binary files /dev/null and b/llvm/docs/llvm_telemetry_design.png differ diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h new file mode 100644 index 0000000000000..ed519448315c2 --- /dev/null +++ b/llvm/include/llvm/Telemetry/Telemetry.h @@ -0,0 +1,162 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides the basic framework for Telemetry. +/// Refer to its documentation at llvm/docs/Telemetry.rst for more details. +//===---------------------------------------------------------------------===// + +#ifndef LLVM_TELEMETRY_TELEMETRY_H +#define LLVM_TELEMETRY_TELEMETRY_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { +namespace telemetry { + +class Serializer { +public: + virtual Error init() = 0; + virtual void write(StringRef KeyName, bool Value) = 0; + virtual void write(StringRef KeyName, StringRef Value) = 0; + + template + std::enable_if_t> write(StringRef KeyName, T Value) { + if constexpr (std::is_signed_v) + writeSigned(KeyName, Value); + else + writeUnsigned(KeyName, Value); + } + + template + void write(StringRef KeyName, const T &Map) { + static_assert(std::is_convertible_v, + "KeyType must be convertible to string"); + beginObject(KeyName); + for (const auto &KeyVal : Map) + write(KeyVal.first, KeyVal.second); + endObject(); + } + + virtual void beginObject(StringRef KeyName) = 0; + virtual void endObject() = 0; + + virtual Error finalize() = 0; + +private: + virtual void writeUnsigned(StringRef KeyName, unsigned long long) = 0; + virtual void writeSigned(StringRef KeyName, long long) = 0; +}; + +/// Configuration for the Manager class. +/// This stores configurations from both users and vendors and is passed +/// to the Manager upon construction. (Any changes to the config after +/// the Manager's construction will not have any effect on it). +/// +/// This struct can be extended as needed to add additional configuration +/// points specific to a vendor's implementation. +struct Config { + // If true, telemetry will be enabled. + const bool EnableTelemetry; + Config(bool E) : EnableTelemetry(E) {} + + virtual std::optional makeSessionId() { return std::nullopt; } +}; + +/// For isa, dyn_cast, etc operations on TelemetryInfo. +typedef unsigned KindType; +/// This struct is used by TelemetryInfo to support isa<>, dyn_cast<> +/// operations. +/// It is defined as a struct (rather than an enum) because it is +/// expected to be extended by subclasses which may have +/// additional TelemetryInfo types defined to describe different events. +struct EntryKind { + static const KindType Base = 0; +}; + +/// TelemetryInfo is the data courier, used to move instrumented data +/// from the tool being monitored to the Telemetry framework. +/// +/// This base class contains only the basic set of telemetry data. +/// Downstream implementations can define more subclasses with +/// additional fields to describe different events and concepts. +/// +/// For example, The LLDB debugger can define a DebugCommandInfo subclass +/// which has additional fields about the debug-command being instrumented, +/// such as `CommandArguments` or `CommandName`. +struct TelemetryInfo { + // This represents a unique-id, conventionally corresponding to + // a tool's session - i.e., every time the tool starts until it exits. + // + // Note: a tool could have multiple sessions running at once, in which + // case, these shall be multiple sets of TelemetryInfo with multiple unique + // IDs. + // + // Different usages can assign different types of IDs to this field. + std::string SessionId; + + TelemetryInfo() = default; + virtual ~TelemetryInfo() = default; + + virtual void serialize(Serializer &serializer) const; + + // For isa, dyn_cast, etc, operations. + virtual KindType getKind() const { return EntryKind::Base; } + static bool classof(const TelemetryInfo *T) { + return T->getKind() == EntryKind::Base; + } +}; + +/// This class presents a data sink to which the Telemetry framework +/// sends data. +/// +/// Its implementation is transparent to the framework. +/// It is up to the vendor to decide which pieces of data to forward +/// and where to forward them. +class Destination { +public: + virtual ~Destination() = default; + virtual Error receiveEntry(const TelemetryInfo *Entry) = 0; + virtual StringLiteral name() const = 0; +}; + +/// This class is the main interaction point between any LLVM tool +/// and this framework. +/// It is responsible for collecting telemetry data from the tool being +/// monitored and transmitting the data elsewhere. +class Manager { +public: + // Optional callback for subclasses to perform additional tasks before + // dispatching to Destinations. + virtual Error preDispatch(TelemetryInfo *Entry) = 0; + + // Dispatch Telemetry data to the Destination(s). + // The argument is non-const because the Manager may add or remove + // data from the entry. + virtual Error dispatch(TelemetryInfo *Entry); + + // Register a Destination. + void addDestination(std::unique_ptr Destination); + +private: + std::vector> Destinations; +}; + +} // namespace telemetry +} // namespace llvm + +#endif // LLVM_TELEMETRY_TELEMETRY_H diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 503c77cb13bd0..f6465612d30c0 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -41,6 +41,7 @@ add_subdirectory(ProfileData) add_subdirectory(Passes) add_subdirectory(TargetParser) add_subdirectory(TextAPI) +add_subdirectory(Telemetry) add_subdirectory(ToolDrivers) add_subdirectory(XRay) if (LLVM_INCLUDE_TESTS) diff --git a/llvm/lib/Telemetry/CMakeLists.txt b/llvm/lib/Telemetry/CMakeLists.txt new file mode 100644 index 0000000000000..8208bdadb05e9 --- /dev/null +++ b/llvm/lib/Telemetry/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_component_library(LLVMTelemetry + Telemetry.cpp + + ADDITIONAL_HEADER_DIRS + "${LLVM_MAIN_INCLUDE_DIR}/llvm/Telemetry" +) diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp new file mode 100644 index 0000000000000..de8e77d52623c --- /dev/null +++ b/llvm/lib/Telemetry/Telemetry.cpp @@ -0,0 +1,26 @@ +#include "llvm/Telemetry/Telemetry.h" + +namespace llvm { +namespace telemetry { + +void TelemetryInfo::serialize(Serializer &serializer) const { + serializer.write("SessionId", SessionId); +} + +Error Manager::dispatch(TelemetryInfo *Entry) { + if (Error Err = preDispatch(Entry)) + return std::move(Err); + + Error AllErrs = Error::success(); + for (auto &Dest : Destinations) { + AllErrs = joinErrors(std::move(AllErrs), Dest->receiveEntry(Entry)); + } + return AllErrs; +} + +void Manager::addDestination(std::unique_ptr Dest) { + Destinations.push_back(std::move(Dest)); +} + +} // namespace telemetry +} // namespace llvm diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index 8892f3e75729a..81abce51b8939 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -63,6 +63,7 @@ add_subdirectory(Support) add_subdirectory(TableGen) add_subdirectory(Target) add_subdirectory(TargetParser) +add_subdirectory(Telemetry) add_subdirectory(Testing) add_subdirectory(TextAPI) add_subdirectory(Transforms) diff --git a/llvm/unittests/Telemetry/CMakeLists.txt b/llvm/unittests/Telemetry/CMakeLists.txt new file mode 100644 index 0000000000000..a40ae4b2f5560 --- /dev/null +++ b/llvm/unittests/Telemetry/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Telemetry + Core + Support + ) + +add_llvm_unittest(TelemetryTests + TelemetryTest.cpp + ) diff --git a/llvm/unittests/Telemetry/TelemetryTest.cpp b/llvm/unittests/Telemetry/TelemetryTest.cpp new file mode 100644 index 0000000000000..05523f1bcfaa2 --- /dev/null +++ b/llvm/unittests/Telemetry/TelemetryTest.cpp @@ -0,0 +1,242 @@ +//===- llvm/unittest/Telemetry/TelemetryTest.cpp - Telemetry unittests ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Telemetry/Telemetry.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "gtest/gtest.h" +#include +#include + +namespace llvm { +namespace telemetry { +// Testing parameters. +// +// These are set by each test to force certain outcomes. +struct TestContext { + // Controlling whether there is vendor plugin. In "real" implementation, the + // plugin-registration framework will handle the overrides but for tests, we + // just use a bool flag to decide which function to call. + bool HasVendorPlugin = false; + + // This field contains data emitted by the framework for later + // verification by the tests. + std::string Buffer = ""; + + // The expected Uuid generated by the fake tool. + std::string ExpectedUuid = ""; +}; + +class StringSerializer : public Serializer { +public: + const std::string &getString() { return Buffer; } + + Error init() override { + if (Started) + return createStringError("Serializer already in use"); + Started = true; + Buffer.clear(); + return Error::success(); + } + + void write(StringRef KeyName, bool Value) override { + writeHelper(KeyName, Value); + } + + void write(StringRef KeyName, StringRef Value) override { + writeHelper(KeyName, Value); + } + + void beginObject(StringRef KeyName) override { + Children.push_back(std::string("\n")); + ChildrenNames.push_back(KeyName.str()); + } + + void endObject() override { + assert(!Children.empty() && !ChildrenNames.empty()); + std::string ChildBuff = Children.back(); + std::string Name = ChildrenNames.back(); + Children.pop_back(); + ChildrenNames.pop_back(); + writeHelper(Name, ChildBuff); + } + + Error finalize() override { + assert(Children.empty() && ChildrenNames.empty()); + if (!Started) + return createStringError("Serializer not currently in use"); + Started = false; + return Error::success(); + } + +private: + template void writeHelper(StringRef Name, T Value) { + assert(Started && "serializer not started"); + if (Children.empty()) + Buffer.append((Name + ":" + Twine(Value) + "\n").str()); + else + Children.back().append((Name + ":" + Twine(Value) + "\n").str()); + } + + void writeUnsigned(StringRef KeyName, unsigned long long Value) override { + writeHelper(KeyName, Value); + } + + void writeSigned(StringRef KeyName, long long Value) override { + writeHelper(KeyName, Value); + } + + bool Started = false; + std::string Buffer; + std::vector Children; + std::vector ChildrenNames; +}; + +namespace vendor { +struct VendorConfig : public Config { + VendorConfig(bool Enable) : Config(Enable) {} + std::optional makeSessionId() override { + static int seed = 0; + return std::to_string(seed++); + } +}; + +std::shared_ptr getTelemetryConfig(const TestContext &Ctxt) { + return std::make_shared(/*EnableTelemetry=*/true); +} + +class TestStorageDestination : public Destination { +public: + TestStorageDestination(TestContext *Ctxt) : CurrentContext(Ctxt) {} + + Error receiveEntry(const TelemetryInfo *Entry) override { + if (Error Err = serializer.init()) + return Err; + + Entry->serialize(serializer); + if (Error Err = serializer.finalize()) + return Err; + + CurrentContext->Buffer.append(serializer.getString()); + return Error::success(); + } + + StringLiteral name() const override { return "TestDestination"; } + +private: + TestContext *CurrentContext; + StringSerializer serializer; +}; + +struct StartupInfo : public TelemetryInfo { + std::string ToolName; + std::map MetaData; + + void serialize(Serializer &serializer) const override { + TelemetryInfo::serialize(serializer); + serializer.write("ToolName", ToolName); + serializer.write("MetaData", MetaData); + } +}; + +struct ExitInfo : public TelemetryInfo { + int ExitCode; + std::string ExitDesc; + void serialize(Serializer &serializer) const override { + TelemetryInfo::serialize(serializer); + serializer.write("ExitCode", ExitCode); + serializer.write("ExitDesc", ExitDesc); + } +}; + +class TestManager : public Manager { +public: + static std::unique_ptr + createInstance(Config *Config, TestContext *CurrentContext) { + if (!Config->EnableTelemetry) + return nullptr; + CurrentContext->ExpectedUuid = *(Config->makeSessionId()); + std::unique_ptr Ret = std::make_unique( + CurrentContext, CurrentContext->ExpectedUuid); + + // Add a destination. + Ret->addDestination( + std::make_unique(CurrentContext)); + + return Ret; + } + + TestManager(TestContext *Ctxt, std::string Id) + : CurrentContext(Ctxt), SessionId(Id) {} + + Error preDispatch(TelemetryInfo *Entry) override { + Entry->SessionId = SessionId; + return Error::success(); + } + + std::string getSessionId() { return SessionId; } + +private: + TestContext *CurrentContext; + const std::string SessionId; +}; +} // namespace vendor + +std::shared_ptr getTelemetryConfig(const TestContext &Ctxt) { + if (Ctxt.HasVendorPlugin) + return vendor::getTelemetryConfig(Ctxt); + + return std::make_shared(false); +} + +TEST(TelemetryTest, TelemetryDisabled) { + TestContext Context; + Context.HasVendorPlugin = false; + + std::shared_ptr Config = getTelemetryConfig(Context); + auto Manager = vendor::TestManager::createInstance(Config.get(), &Context); + EXPECT_EQ(nullptr, Manager); +} + +TEST(TelemetryTest, TelemetryEnabled) { + const std::string ToolName = "TelemetryTestTool"; + + // Preset some params. + TestContext Context; + Context.HasVendorPlugin = true; + Context.Buffer.clear(); + + std::shared_ptr Config = getTelemetryConfig(Context); + auto Manager = vendor::TestManager::createInstance(Config.get(), &Context); + + EXPECT_STREQ(Manager->getSessionId().c_str(), Context.ExpectedUuid.c_str()); + + vendor::StartupInfo S; + S.ToolName = ToolName; + S.MetaData["a"] = "A"; + S.MetaData["b"] = "B"; + + Error startupEmitStatus = Manager->dispatch(&S); + EXPECT_FALSE(startupEmitStatus); + std::string ExpectedBuffer = + "SessionId:0\nToolName:TelemetryTestTool\nMetaData:\na:A\nb:B\n\n"; + EXPECT_EQ(ExpectedBuffer, Context.Buffer); + Context.Buffer.clear(); + + vendor::ExitInfo E; + E.ExitCode = 0; + E.ExitDesc = "success"; + Error exitEmitStatus = Manager->dispatch(&E); + EXPECT_FALSE(exitEmitStatus); + ExpectedBuffer = "SessionId:0\nExitCode:0\nExitDesc:success\n"; + EXPECT_EQ(ExpectedBuffer, Context.Buffer); +} + +} // namespace telemetry +} // namespace llvm