llvm · oontvoo · Dec 20, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/llvm/docs/Telemetry.rst b/llvm/docs/Telemetry.rst
@@ -0,0 +1,214 @@
+===========================
+Telemetry framework in LLVM
+===========================
+
+.. contents::
+   :local:
+
+.. toctree::
+   :hidden:
+
+===========================
+Telemetry framework in LLVM
+===========================
+
+Objective
+=========
+
+Provides a common framework in LLVM for collecting various usage and performance
+metrics.
+It is located at `llvm/Telemetry/Telemetry.h`
+
+Characteristics
+---------------
+* Configurable and extensible by:
+
+  * Tools: any tool that wants to use Telemetry can extend and customize it.
+  * Vendors: Toolchain vendors can also provide custom implementation of the
+    library, which could either override or extend the given tool's upstream
+    implementation, to best fit their organization's usage and privacy models.
+  * End users of such tool can also configure Telemetry (as allowed by their
+    vendor).
+
+
+Important notes
+----------------
+
+* There is no concrete implementation of a Telemetry library in upstream LLVM.
+  We only provide the abstract API here. Any tool that wants telemetry will
+  implement one.
+
+  The rationale for this is that, all the tools in LLVM are very different in
+  what they care about (what/where/when to instrument data). Hence, it might not
+  be practical to have a single implementation.
+  However, in the future, if we see enough common pattern, we can extract them
+  into a shared place. This is TBD - contributions are welcomed.
+
+* No implementation of Telemetry in upstream LLVM shall store any of the
+  collected data due to privacy and security reasons:
+
+  * Different organizations have different privacy models:
+
+    * Which data is sensitive, which is not?
+    * Whether it is acceptable for instrumented data to be stored anywhere?
+      (to a local file, what not?)
+
+  * Data ownership and data collection consents are hard to accommodate from
+    LLVM developers' point of view:
+
+    * Eg., data collected by Telemetry is not neccessarily owned by the user
+      of an LLVM tool with Telemetry enabled, hence the user's consent to data
+      collection is not meaningful. On the other hand, LLVM developers have no
+      reasonable ways to request consent from the "real" owners.
+
+
+High-level design
+=================
+
+Key components
+--------------
+
+The framework consists of four important classes:
+
+* `llvm::telemetry::Telemeter`: The class responsible for collecting and
+  transmitting telemetry data. This is the main point of interaction between the
+  framework and any tool that wants to enable telemery.
+* `llvm::telemetry::TelemetryInfo`: Data courier
+* `llvm::telemetry::Destination`: Data sink to which the Telemetry framework
+  sends data.
+  Its implementation is transparent to the framework.
+  It is up to the vendor to decide which pieces of data to forward and where
+  to forward them to their final storage.
+* `llvm::telemetry::Config`: Configurations on the `Telemeter`.
+
+.. image:: llvm_telemetry_design.png
+
+How to implement and interact with the API
+------------------------------------------
+
+To use Telemetry in your tool, you need to provide a concrete implementation of the `Telemeter` class and `Destination`.
+
+1) Define a custom `Telemeter` and `Destination`
+
+.. code-block:: c++
+
+    // This destination just prints the given entry to a stdout.
+    // In "real life", this would be where you forward the data to your
+    // custom data storage.
+    class MyStdoutDestination : public llvm::telemetry::Destination {
+    public:
+      Error emitEntry(const TelemetryInfo* Entry) override {
+         return sendToBlackBox(Entry);
+
+      }
+
+    private:
+      Error sendToBlackBox(const TelemetryInfo* Entry) {
+          // This could send the data anywhere.
+          // But we're simply sending it to stdout for the example.
+          llvm::outs() << entryToString(Entry) << "\n";
+          return llvm::success();
+      }
+
+      std::string entryToString(const TelemetryInfo* Entry) {
+        // make a string-representation of the given entry.
+      }
+    };
+
+    // This defines a custom TelemetryInfo that has an addition Msg field.
+    struct MyTelemetryInfo : public llvm::telemetry::TelemetryInfo {
+      std::string Msg;
+
+      json::Object serializeToJson() const {
+        json::Object Ret = TelemeteryInfo::serializeToJson();
+        Ret.emplace_back("MyMsg", Msg);
+        return std::move(Ret);
+      }
+
+      // TODO: implement getKind() and classof() to support dyn_cast operations.
+    };
+
+    class MyTelemeter : public llvm::telemery::Telemeter {
+    public:
+      static std::unique_ptr<MyTelemeter> createInstatnce(llvm::telemetry::Config* config) {
+        // If Telemetry is not enabled, then just return null;
+        if (!config->EnableTelemetry) return nullptr;
+
+        std::make_unique<MyTelemeter>();
+      }
+      MyTelemeter() = default;
+
+      void logStartup(llvm::StringRef ToolName, TelemetryInfo* Entry) override {
+        if (MyTelemetryInfo* M = dyn_cast<MyTelemetryInfo>(Entry)) {
+          M->Msg = "Starting up tool with name: " + ToolName;
+          emitToAllDestinations(M);
+        } else {
+          emitToAllDestinations(Entry);
+        }
+      }
+
+      void logExit(llvm::StringRef ToolName, TelemetryInfo* Entry) override {
+        if (MyTelemetryInfo* M = dyn_cast<MyTelemetryInfo>(Entry)) {
+          M->Msg = "Exitting tool with name: " + ToolName;
+          emitToAllDestinations(M);
+        } else {
+          emitToAllDestinations(Entry);
+        }
+      }
+
+      void addDestination(Destination* dest) override {
+        destinations.push_back(dest);
+      }
+
+      // You can also define additional instrumentation points.)
+      void logAdditionalPoint(TelemetryInfo* Entry) {
+          // .... code here
+      }
+    private:
+      void emitToAllDestinations(const TelemetryInfo* Entry) {
+        // Note: could do this in parallel, if needed.
+        for (Destination* Dest : Destinations)
+          Dest->emitEntry(Entry);
+      }
+      std::vector<Destination> Destinations;
+    };
+
+2) Use the library in your tool.
+
+Logging the tool init-process:
+
+.. code-block:: c++
+
+  // At tool's init code
+  auto StartTime = std::chrono::time_point<std::chrono::steady_clock>::now();
+  llvm::telemetry::Config MyConfig = makeConfig(); // build up the appropriate Config struct here.
+  auto Telemeter = MyTelemeter::createInstance(&MyConfig);
+  std::string CurrentSessionId = ...; // Make some unique ID corresponding to the current session here.
+
+  // Any other tool's init code can go here
+  // ...
+
+  // Finally, take a snapshot of the time now so we know how long it took the
+  // init process to finish
+  auto EndTime = std::chrono::time_point<std::chrono::steady_clock>::now();
+  MyTelemetryInfo Entry;
+  Entry.SessionId = CurrentSessionId ; // Assign some unique ID here.
+  Entry.Stats = {StartTime, EndTime};
+  Telemeter->logStartup("MyTool", &Entry);
+
+Similar code can be used for logging the tool's exit.
+
+Additionally, at any other point in the tool's lifetime, it can also log telemetry:
+
+.. code-block:: c++
+
+   // At some execution point:
+   auto StartTime = std::chrono::time_point<std::chrono::steady_clock>::now();
+
+   // ... other events happening here
+
+   auto EndTime = std::chrono::time_point<std::chrono::steady_clock>::now();
+  MyTelemetryInfo Entry;
+  Entry.SessionId = CurrentSessionId ; // Assign some unique ID here.
+  Entry.Stats = {StartTime, EndTime};
+  Telemeter->logAdditionalPoint(&Entry);
diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst
@@ -72,6 +72,7 @@ intermediate LLVM representation.
    SupportLibrary
    TableGen/index
    TableGenFundamentals
+   Telemetry
    Vectorizers
    WritingAnLLVMPass
    WritingAnLLVMNewPMPass
@@ -293,3 +294,6 @@ Additional Topics
 
 :doc:`Sandbox IR <SandboxIR>`
    This document describes the design and usage of Sandbox IR, a transactional layer over LLVM IR.
+
+:doc:`Telemetry`
+   This document describes the Telemetry framework in LLVM.
diff --git a/llvm/docs/llvm_telemetry_design.png b/llvm/docs/llvm_telemetry_design.png
diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h
@@ -0,0 +1,136 @@
+//===- llvm/Telemetry/Telemetry.h - Telemetry -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides the basic framework for Telemetry
+///
+/// It comprises of three important structs/classes:
+///
+/// - Telemeter: The class responsible for collecting and forwarding
+///              telemery data.
+/// - TelemetryInfo: data courier
+/// - TelemetryConfig: this stores configurations on Telemeter.
+///
+/// Refer to its documentation at llvm/docs/Telemetry.rst for more details.
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_TELEMETRY_TELEMETRY_H
+#define LLVM_TELEMETRY_TELEMETRY_H
+
+#include <chrono>
+#include <ctime>
+#include <memory>
+#include <optional>
+#include <string>
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace telemetry {
+
+/// Configuration for the Telemeter class.
+/// This stores configurations from both users and vendors and is passed
+/// to the Telemeter upon construction. (Any changes to the config after
+/// the Telemeter's construction will not have effect on it).
-/// the Telemeter's construction will not have effect on it).
+/// the Telemeter's construction will not have effect any on it).
-/// the Telemeter's construction will not have effect on it).
+/// the Telemeter's construction will not have effect any on it).
+///
+/// This struct can be extended as needed to add additional configuration
+/// points specific to a vendor's implementation.
+struct Config {
+  // If true, telemetry will be enabled.
+  bool EnableTelemetry;
+
+  // Implementation-defined names of additional destinations to send
+  // telemetry data (Could be stdout, stderr, or some local paths, etc).
+  //
+  // These strings will be interpreted by the vendor's code.
+  // So the users must pick the from their vendor's pre-defined
+  // set of Destinations.
+  std::vector<std::string> AdditionalDestinations;
+};
+
+/// For isa, dyn_cast, etc operations on TelemetryInfo.
+typedef unsigned KindType;
+/// This struct is used by TelemetryInfo to support isa<>, dyn_cast<>
+/// operations.
+/// It is defined as a struct (rather than an enum) because it is
+/// expectend to be extended by subclasses which may have
-/// expectend to be extended by subclasses which may have
+/// expected to be extended by subclasses which may have
-/// expectend to be extended by subclasses which may have
+/// expected to be extended by subclasses which may have
+/// additional TelemetryInfo types defined to describe different events.
+struct EntryKind {
+  static const KindType Base = 0;
+};
+
+/// TelemetryInfo is the data courier, used to move instrumented data
+/// from the tool being monitored to the Telemery framework.
-/// from the tool being monitored to the Telemery framework.
+/// from the tool being monitored to the Telemetry framework.
-/// from the tool being monitored to the Telemery framework.
+/// from the tool being monitored to the Telemetry framework.
+///
+/// This base class contains only the basic set of telemetry data.
+/// Downstream implementations can add more fields as needed to describe
+/// additional events.
-/// Downstream implementations can add more fields as needed to describe
-/// additional events.
+/// Downstream implementations can define subclasses to provide different bits
+/// of information.
-/// Downstream implementations can add more fields as needed to describe
-/// additional events.
+/// Downstream implementations can define subclasses to provide different bits
+/// of information.
+///
+/// For example, The LLDB debugger can define a DebugCommandInfo subclass
+/// which has additional fields about the debug-command being instrumented,
+/// such as `CommandArguments` or `CommandName`.
+struct TelemetryInfo {
+  // This represents a unique-id, conventionally corresponding to
+  // a tool's session - i.e., every time the tool starts until it exits.
+  //
+  // Note: a tool could have multiple sessions running at once, in which
+  // case, these shall be multiple sets of TelemetryInfo with multiple unique
+  // ids.
+  //
+  // Different usages can assign different types of IDs to this field.
+  std::string SessionId;
+
+  TelemetryInfo() = default;
+  virtual ~TelemetryInfo() = default;
+
+  virtual json::Object serializeToJson() const;
+
+  // For isa, dyn_cast, etc, operations.
+  virtual KindType getKind() const { return EntryKind::Base; }
+  static bool classof(const TelemetryInfo *T) {
+    if (T == nullptr)
+      return false;
+    return T->getKind() == EntryKind::Base;
+  }
+};
+
+/// This class presents a data sink to which the Telemetry framework
+/// sends data.
+///
+/// Its implementation is transparent to the framework.
+/// It is up to the vendor to decide which pieces of data to forward
+/// and where to forward them.
+class Destination {
+public:
+  virtual ~Destination() = default;
+  virtual Error emitEntry(const TelemetryInfo *Entry) = 0;
+  virtual llvm::StringLiteral name() const = 0;
+};
+
+/// This class is the main interaction point between any LLVM tool
+/// and this framework.
+/// It is responsible for collecting telemetry data from the tool being
+/// monitored and transmitting the data elsewhere.
+class Telemeter {
+public:
+  // Invoked upon tool startup
+  virtual void atStartup(llvm::StringRef ToolPath, TelemetryInfo *Entry) = 0;
+
+  // Invoked upon tool exit.
+  virtual void atExit(llvm::StringRef ToolPath, TelemetryInfo *Entry) = 0;
+
+  virtual void addDestination(std::unique_ptr<Destination> Destination) = 0;
+};
+
+} // namespace telemetry
+} // namespace llvm
+
+#endif // LLVM_TELEMETRY_TELEMETRY_H
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
@@ -41,6 +41,7 @@ add_subdirectory(ProfileData)
 add_subdirectory(Passes)
 add_subdirectory(TargetParser)
 add_subdirectory(TextAPI)
+add_subdirectory(Telemetry)
 add_subdirectory(ToolDrivers)
 add_subdirectory(XRay)
 if (LLVM_INCLUDE_TESTS)

diff --git a/llvm/lib/Telemetry/CMakeLists.txt b/llvm/lib/Telemetry/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_component_library(LLVMTelemetry
+  Telemetry.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  "${LLVM_MAIN_INCLUDE_DIR}/llvm/Telemetry"
+)
diff --git a/llvm/lib/Telemetry/Telemetry.cpp b/llvm/lib/Telemetry/Telemetry.cpp
@@ -0,0 +1,13 @@
+#include "llvm/Telemetry/Telemetry.h"
+
+namespace llvm {
+namespace telemetry {
+
+llvm::json::Object TelemetryInfo::serializeToJson() const {
+  return json::Object{
+      {"SessionId", SessionId},
+  };
+};
+
+} // namespace telemetry
+} // namespace llvm