Skip to content

Commit 6fb4e61

Browse files
add benchmarks (#32)
* Add benchmarks pipeline * Basic CMakeLists.txt * Do 10 repetitions of benchmarks * Make runall.sh executable * Increase exec time of 1 benchmark * Add PR comments * benchmark based on the example/ * touch up in preparation for sync with benchmarking-platform * remove libcurl's libidn2 dependency * rewrite benchmark/README.md * GitHub doesn't like links to submodules * benchmark links the static library --------- Co-authored-by: Dmytro Yurchenko <[email protected]>
1 parent 2bcff73 commit 6fb4e61

File tree

16 files changed

+750
-11
lines changed

16 files changed

+750
-11
lines changed

.gitlab-ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
stages:
2+
- benchmarks
3+
4+
include: ".gitlab/benchmarks.yml"

.gitlab/benchmarks.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
variables:
2+
BASE_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-cpp
3+
4+
benchmarks:
5+
stage: benchmarks
6+
when: on_success
7+
tags: ["runner:apm-k8s-tweaked-metal"]
8+
image: $BASE_CI_IMAGE
9+
interruptible: true
10+
timeout: 15m
11+
script:
12+
- export ARTIFACTS_DIR="$(pwd)/reports" && (mkdir "${ARTIFACTS_DIR}" || :)
13+
- git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/"
14+
- git clone --branch dd-trace-cpp https://github.com/DataDog/benchmarking-platform /platform && cd /platform
15+
- ./steps/capture-hardware-software-info.sh
16+
- ./steps/run-benchmarks.sh
17+
- ./steps/analyze-results.sh
18+
- "./steps/upload-results-to-s3.sh || :"
19+
- "./steps/post-pr-comment.sh || :"
20+
artifacts:
21+
name: "reports"
22+
paths:
23+
- reports/
24+
expire_in: 3 months
25+
variables:
26+
UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance.
27+
UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-cpp"
28+
UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built.
29+
UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for.
30+
31+
KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: dd-trace-cpp
32+
FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"

.gitmodules

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[submodule "benchmark/google-benchmark"]
2+
path = benchmark/google-benchmark
3+
url = https://github.com/google/benchmark.git
4+
[submodule "benchmark/tinycc"]
5+
path = benchmark/tinycc
6+
url = https://github.com/TinyCC/tinycc.git

CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ ExternalProject_Add(curl
6161
BUILD_IN_SOURCE 1
6262
DOWNLOAD_EXTRACT_TIMESTAMP 0
6363
SOURCE_DIR ${CMAKE_BINARY_DIR}/curl
64-
CONFIGURE_COMMAND ${CMAKE_BINARY_DIR}/curl/configure --prefix=${CMAKE_BINARY_DIR} --disable-ftp --disable-ldap --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-smtp --disable-gopher --without-ssl --disable-crypto-auth --without-axtls --without-zlib --disable-rtsp --enable-shared=no --enable-static=yes --with-pic --without-brotli
64+
CONFIGURE_COMMAND ${CMAKE_BINARY_DIR}/curl/configure --prefix=${CMAKE_BINARY_DIR} --disable-ftp --disable-ldap --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-smtp --disable-gopher --without-ssl --disable-crypto-auth --without-axtls --without-zlib --disable-rtsp --enable-shared=no --enable-static=yes --with-pic --without-brotli --without-libidn2
6565
BUILD_COMMAND make -j${MAKE_JOB_COUNT}
6666
INSTALL_COMMAND make install
6767
)
@@ -228,3 +228,7 @@ endif()
228228

229229
# Each example has its own build flag.
230230
add_subdirectory(examples)
231+
232+
if(BUILD_BENCHMARK)
233+
add_subdirectory(benchmark)
234+
endif()

benchmark/CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# This defines an executable, `dd_trace_cpp-benchmark`, that's a Google
2+
# Benchmark based program that is a microbenchmark for dd-trace-cpp.
3+
#
4+
# It's intended to be used as part of Datadog's internal benchmarking platform.
5+
# See `../.gitlab/benchmarks.yml`.
6+
7+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
8+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9+
set(CMAKE_CXX_STANDARD 17)
10+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
11+
set(CMAKE_CXX_EXTENSIONS OFF)
12+
13+
add_executable(dd_trace_cpp-benchmark
14+
benchmark.cpp
15+
hasher.cpp
16+
)
17+
18+
# Google Benchmark is included as a git submodule.
19+
# It depends on Google Test, which it will download if this option is set.
20+
set(BENCHMARK_DOWNLOAD_DEPENDENCIES ON)
21+
# Don't build Google Benchmark's unit tests.
22+
set(BENCHMARK_ENABLE_TESTING OFF)
23+
add_subdirectory(google-benchmark)
24+
25+
target_link_libraries(dd_trace_cpp-benchmark benchmark::benchmark dd_trace_cpp-static)

benchmark/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
Microbenchmarks
2+
===============
3+
This directory contains the definition of a program that measures the timing and
4+
resource consumption of a test tracing scenario.
5+
6+
The benchmark uses [Google Benchmark][1], whose source is included as a git
7+
submodule under `./google-benchmark`.
8+
9+
The scenario that's measured is similar to the [../example][3] setup. A trace
10+
is created whose structure reflects that of a particular file directory
11+
structure. The directory structure, in this case, is the source tree of the
12+
[Tiny C Compiler][4], whose source is included as a git submodule under
13+
`./tinycc`.
14+
15+
The scenario does not use the network, spawn any threads, or read/write
16+
any files. The operations that are implicitly covered by the scenario are:
17+
18+
- configuring and initializing a tracer,
19+
- creating a trace,
20+
- adding spans to a trace,
21+
- setting tags on spans,
22+
- finishing spans,
23+
- finalizing a trace and making a sampling decision,
24+
- serializing a trace as MessagePack.
25+
26+
[../bin/benchmark][6] is a script that builds dd-trace-cpp, this benchmark, and
27+
then runs the benchmark.
28+
29+
This benchmark is intended to be driven by Datadog's internal benchmarking
30+
platform. See [../.gitlab/benchmarks.yml][7].
31+
32+
[1]: https://github.com/google/benchmark
33+
[3]: ../example
34+
[4]: https://bellard.org/tcc/
35+
[6]: ../bin/benchmark
36+
[7]: ../.gitlab/benchmarks.yml

benchmark/benchmark.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <benchmark/benchmark.h>
2+
3+
#include <datadog/collector.h>
4+
#include <datadog/json.hpp>
5+
#include <datadog/logger.h>
6+
#include <datadog/span_data.h>
7+
#include <datadog/tracer.h>
8+
#include <datadog/tracer_config.h>
9+
10+
#include <memory>
11+
12+
#include "hasher.h"
13+
14+
namespace {
15+
16+
namespace dd = datadog::tracing;
17+
18+
// `NullLogger` doesn't log. It avoids `log_startup` spam in the benchmark.
19+
struct NullLogger : public dd::Logger {
20+
void log_error(const LogFunc&) override {}
21+
void log_startup(const LogFunc&) override {}
22+
void log_error(const dd::Error&) override {}
23+
void log_error(dd::StringView) override {}
24+
};
25+
26+
// `SerializingCollector` immediately MessagePack-serializes spans sent to it.
27+
// This allows us to track the overhead of the serialization code, without
28+
// having to use HTTP as is done in the default collector, `DatadogAgent`.
29+
struct SerializingCollector : public dd::Collector {
30+
dd::Expected<void> send(
31+
std::vector<std::unique_ptr<dd::SpanData>>&& spans,
32+
const std::shared_ptr<dd::TraceSampler>& /*response_handler*/) override {
33+
std::string buffer;
34+
return dd::msgpack_encode(buffer, spans);
35+
}
36+
37+
nlohmann::json config_json() const override {
38+
return nlohmann::json::object({
39+
{"type", "SerializingCollector"}
40+
});
41+
}
42+
};
43+
44+
// The benchmark `BM_TraceTinyCCSource`, for each iteration over `state`,
45+
// creates a trace whose shape is the same as the file system tree under
46+
// `./tinycc`. It's similar to what is done in `../example`.
47+
void BM_TraceTinyCCSource(benchmark::State& state) {
48+
for (auto _ : state) {
49+
dd::TracerConfig config;
50+
config.defaults.service = "benchmark";
51+
config.logger = std::make_shared<NullLogger>();
52+
config.collector = std::make_shared<SerializingCollector>();
53+
const auto valid_config = dd::finalize_config(config);
54+
dd::Tracer tracer{*valid_config};
55+
// Note: This assumes that the benchmark is run from the repository root.
56+
sha256_traced("benchmark/tinycc", tracer);
57+
}
58+
}
59+
BENCHMARK(BM_TraceTinyCCSource);
60+
61+
} // namespace
62+
63+
BENCHMARK_MAIN();

benchmark/google-benchmark

Submodule google-benchmark added at 2dd015d

benchmark/hasher.cpp

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
// `sha256_traced` accepts a file system path and a tracer.
2+
//
3+
// If the path does not exist, print an error.
4+
//
5+
// If the path exists and is a regular file, print the SHA256 digest of the
6+
// file's contents. Produce a single tracing span indicating the calculation.
7+
//
8+
// If the path exists and is a directory, calculate the SHA256 digest of the
9+
// directory from the names and digests of its children, combined in some
10+
// canonical format. Produce a trace whose structure reflects the directory
11+
// structure.
12+
//
13+
// Files that are neither regular files nor directories are ignored.
14+
15+
#include "hasher.h"
16+
17+
#include <datadog/span_config.h>
18+
#include <datadog/tags.h>
19+
#include <datadog/tracer.h>
20+
#include <datadog/tracer_config.h>
21+
22+
#include <algorithm>
23+
#include <array>
24+
#include <cstddef>
25+
#include <cstdint>
26+
#include <cstdio>
27+
#include <iostream>
28+
#include <string>
29+
#include <vector>
30+
31+
#include "picosha2.h"
32+
33+
namespace fs = std::filesystem;
34+
namespace dd = datadog::tracing;
35+
36+
using Digest = std::array<char, picosha2::k_digest_size>;
37+
38+
// Return the specified `digest` formatted as a lower case hexadecimal string.
39+
std::string hex(const Digest &digest) {
40+
std::string result;
41+
for (std::size_t i = 0; i < digest.size(); ++i) {
42+
char buf[2 + 1];
43+
std::snprintf(buf, sizeof buf, "%02x",
44+
static_cast<unsigned char>(digest[i]));
45+
result.append(buf, 2);
46+
}
47+
return result;
48+
}
49+
50+
// Store into the specified `digest` the SHA256 digest of the contents of the
51+
// specified `file`. Return zero on success, or a nonzero value if an error
52+
// occurs.
53+
int sha256(Digest &digest, const fs::path &file) {
54+
std::ifstream in(file);
55+
if (!in) {
56+
return 1;
57+
}
58+
picosha2::hash256(in, digest.begin(), digest.end());
59+
return 0;
60+
}
61+
62+
// Return the SHA256 digest of a directory having the specified `children`.
63+
// This function will sort `children` in place.
64+
Digest sha256(std::vector<std::pair<fs::path, Digest>> &children) {
65+
std::sort(children.begin(), children.end());
66+
67+
std::vector<char> descriptor;
68+
for (const auto &record : children) {
69+
const std::string path = record.first.filename().u8string();
70+
const Digest &hash = record.second;
71+
descriptor.insert(descriptor.end(), path.begin(), path.end());
72+
descriptor.insert(descriptor.end(), hash.begin(), hash.end());
73+
}
74+
75+
Digest digest;
76+
picosha2::hash256(descriptor, digest);
77+
return digest;
78+
}
79+
80+
int sha256_traced(Digest &digest, const fs::path &path,
81+
const dd::Span &active_span) try {
82+
if (fs::is_directory(path)) {
83+
// Directory: Calculate hash of children, and then combine them.
84+
dd::SpanConfig config;
85+
config.name = "sha256.directory";
86+
auto span = active_span.create_child(config);
87+
span.set_tag("path", path.u8string());
88+
span.set_tag("file_name", path.u8string());
89+
span.set_tag("directory_name", path.u8string());
90+
91+
std::vector<std::pair<fs::path, Digest>> children;
92+
const auto options = fs::directory_options::skip_permission_denied;
93+
for (const auto &entry : fs::directory_iterator(path, options)) {
94+
if (!(entry.is_regular_file() || entry.is_directory())) {
95+
continue;
96+
}
97+
Digest hash;
98+
const fs::path &child = entry;
99+
if (sha256_traced(hash, child, span)) {
100+
span.set_error_message(
101+
"unable to calculate digest of " + child.u8string());
102+
return 1;
103+
}
104+
children.emplace_back(child, hash);
105+
}
106+
span.set_tag("number_of_children_included",
107+
std::to_string(children.size()));
108+
digest = sha256(children);
109+
span.set_tag("sha256_hex", hex(digest));
110+
return 0;
111+
} else if (fs::is_regular_file(path)) {
112+
// Regular file: Calculate hash of file contents.
113+
dd::SpanConfig config;
114+
config.name = "sha256.file";
115+
auto span = active_span.create_child(config);
116+
span.set_tag("path", path.u8string());
117+
span.set_tag("file_name", path.u8string());
118+
span.set_tag("file_size_bytes", std::to_string(fs::file_size(path)));
119+
const int rc = sha256(digest, path);
120+
if (rc) {
121+
span.set_error_message("Unable to calculate sha256 hash.");
122+
} else {
123+
span.set_tag("sha256_hex", hex(digest));
124+
}
125+
return rc;
126+
} else {
127+
// Other kind of file (neither directory nor regular file): Ignore.
128+
return 1;
129+
}
130+
} catch (const fs::filesystem_error &) {
131+
return 1;
132+
} catch (const std::ios_base::failure &) {
133+
return 1;
134+
}
135+
136+
void sha256_traced(const fs::path &path, dd::Tracer &tracer) {
137+
// Create a root span for the current request.
138+
dd::SpanConfig config;
139+
config.name = "sha256.request";
140+
auto root = tracer.create_span(config);
141+
root.set_tag("path", path.u8string());
142+
143+
if (!fs::exists(path)) {
144+
root.set_error_message("The file does not exist.");
145+
return;
146+
}
147+
148+
Digest digest;
149+
if (sha256_traced(digest, path, root)) {
150+
root.set_error_message("Unable to calculate sha256 hash.");
151+
} else {
152+
const std::string hex_digest = hex(digest);
153+
root.set_tag("sha256_hex", hex_digest);
154+
}
155+
}

benchmark/hasher.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#include <filesystem>
2+
3+
namespace datadog {
4+
namespace tracing {
5+
class Tracer;
6+
} // namespace tracing
7+
} // namespace datadog
8+
9+
// Use the specified `tracer` to create a trace whose structure resembles the
10+
// file system tree rooted at the specified `path`.
11+
void sha256_traced(const std::filesystem::path &path, datadog::tracing::Tracer &tracer);

0 commit comments

Comments
 (0)