Skip to content

Commit 6a724a4

Browse files
authored
feat: support origin detection (#214)
Origin Detection allows to detect where the contaienr traces come from, and add container tags automatically to the local root span.
1 parent e14a5a4 commit 6a724a4

File tree

14 files changed

+300
-17
lines changed

14 files changed

+300
-17
lines changed

CMakePresets.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@
1919
"cacheVariables": {
2020
"CMAKE_POLICY_VERSION_MINIMUM": "3.5"
2121
}
22+
},
23+
{
24+
"name": "dev",
25+
"displayName": "Development",
26+
"cacheVariables": {
27+
"CMAKE_BUILD_TYPE": "Debug",
28+
"DD_TRACE_ENABLE_SANITIZE": "ON",
29+
"DD_TRACE_BUILD_TESTING": "ON",
30+
"DD_TRACE_BUILD_EXAMPLES": "ON"
31+
}
2232
}
2333
]
2434
}

examples/http-server/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ from ubuntu:22.04
33
WORKDIR /dd-trace-cpp
44

55
ARG DEBIAN_FRONTEND=noninteractive
6-
ARG BRANCH=v0.2.1
6+
ARG BRANCH=v1.0.0
77

88
run apt update -y \
99
&& apt install -y g++ make git wget sed \

examples/http-server/docker-compose.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@ services:
5353
- DD_APM_ENABLED=true
5454
- DD_LOG_LEVEL=ERROR
5555
- DOCKER_HOST
56+
- DD_SITE

include/datadog/datadog_agent_config.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ class FinalizedDatadogAgentConfig {
8787
std::chrono::steady_clock::duration shutdown_timeout;
8888
std::chrono::steady_clock::duration remote_configuration_poll_interval;
8989
std::unordered_map<ConfigName, ConfigMetadata> metadata;
90+
91+
// Origin detection
92+
Optional<std::string> admission_controller_uid;
9093
};
9194

9295
Expected<FinalizedDatadogAgentConfig> finalize_config(

include/datadog/environment.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ namespace environment {
5959
MACRO(DD_TELEMETRY_LOG_COLLECTION_ENABLED) \
6060
MACRO(DD_INSTRUMENTATION_INSTALL_ID) \
6161
MACRO(DD_INSTRUMENTATION_INSTALL_TYPE) \
62-
MACRO(DD_INSTRUMENTATION_INSTALL_TIME)
62+
MACRO(DD_INSTRUMENTATION_INSTALL_TIME) \
63+
MACRO(DD_EXTERNAL_ENV)
6364

6465
#define WITH_COMMA(ARG) ARG,
6566

src/datadog/datadog_agent.cpp

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "collector_response.h"
1818
#include "json.hpp"
1919
#include "msgpack.h"
20+
#include "platform_util.h"
2021
#include "span_data.h"
2122
#include "telemetry_metrics.h"
2223
#include "trace_sampler.h"
@@ -156,10 +157,36 @@ DatadogAgent::DatadogAgent(
156157
flush_interval_(config.flush_interval),
157158
request_timeout_(config.request_timeout),
158159
shutdown_timeout_(config.shutdown_timeout),
159-
remote_config_(tracer_signature, rc_listeners, logger),
160-
tracer_signature_(tracer_signature) {
160+
remote_config_(tracer_signature, rc_listeners, logger) {
161161
assert(logger_);
162162

163+
// Set HTTP headers
164+
headers_.emplace("Content-Type", "application/msgpack");
165+
headers_.emplace("Datadog-Meta-Lang", "cpp");
166+
headers_.emplace("Datadog-Meta-Lang-Version",
167+
tracer_signature.library_language_version);
168+
headers_.emplace("Datadog-Meta-Tracer-Version",
169+
tracer_signature.library_version);
170+
171+
// Origin Detection headers are not necessary when Unix Domain Socket (UDS)
172+
// is used to communicate with the Datadog Agent.
173+
if (!contains(config.url.scheme, "unix")) {
174+
if (auto container_id = container::get_id()) {
175+
if (container_id->type == container::ContainerID::Type::container_id) {
176+
headers_.emplace("Datadog-Container-ID", container_id->value);
177+
headers_.emplace("Datadog-Entity-Id", "ci-" + container_id->value);
178+
} else if (container_id->type ==
179+
container::ContainerID::Type::cgroup_inode) {
180+
headers_.emplace("Datadog-Entity-Id", "in-" + container_id->value);
181+
}
182+
}
183+
184+
if (config.admission_controller_uid) {
185+
headers_.emplace("Datadog-External-Env",
186+
*config.admission_controller_uid);
187+
}
188+
}
189+
163190
tasks_.emplace_back(event_scheduler_->schedule_recurring_event(
164191
config.flush_interval, [this]() { flush(); }));
165192

@@ -252,14 +279,11 @@ void DatadogAgent::flush() {
252279

253280
// This is the callback for setting request headers.
254281
// It's invoked synchronously (before `post` returns).
255-
auto set_request_headers = [&](DictWriter& headers) {
256-
headers.set("Content-Type", "application/msgpack");
257-
headers.set("Datadog-Meta-Lang", "cpp");
258-
headers.set("Datadog-Meta-Lang-Version",
259-
tracer_signature_.library_language_version);
260-
headers.set("Datadog-Meta-Tracer-Version",
261-
tracer_signature_.library_version);
262-
headers.set("X-Datadog-Trace-Count", std::to_string(trace_chunks.size()));
282+
auto set_request_headers = [&](DictWriter& writer) {
283+
writer.set("X-Datadog-Trace-Count", std::to_string(trace_chunks.size()));
284+
for (const auto& [key, value] : headers_) {
285+
writer.set(key, value);
286+
}
263287
};
264288

265289
// This is the callback for the HTTP response. It's invoked

src/datadog/datadog_agent.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ class DatadogAgent : public Collector {
4949
std::chrono::steady_clock::duration shutdown_timeout_;
5050

5151
remote_config::Manager remote_config_;
52-
TracerSignature tracer_signature_;
52+
53+
std::unordered_map<std::string, std::string> headers_;
5354

5455
void flush();
5556

src/datadog/datadog_agent_config.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "default_http_client.h"
99
#include "parse_util.h"
10+
#include "platform_util.h"
1011
#include "threaded_event_scheduler.h"
1112

1213
namespace datadog {
@@ -144,6 +145,12 @@ Expected<FinalizedDatadogAgentConfig> finalize_config(
144145
result.metadata[ConfigName::AGENT_URL] =
145146
ConfigMetadata(ConfigName::AGENT_URL, url, origin);
146147

148+
/// Starting Agent X, the admission controller inject a unique identifier
149+
/// through `DD_EXTERNAL_ENV`. This uid is used for origin detection.
150+
if (auto external_env = lookup(environment::DD_EXTERNAL_ENV)) {
151+
result.admission_controller_uid = std::string(*external_env);
152+
}
153+
147154
return result;
148155
}
149156

src/datadog/platform_util.cpp

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "platform_util.h"
22

3+
#include <cstdint>
4+
#include <fstream>
5+
36
// clang-format off
47
#if defined(__x86_64__) || defined(_M_X64)
58
# define DD_SDK_CPU_ARCH "x86_64"
@@ -24,11 +27,13 @@
2427
# define DD_SDK_OS "GNU/Linux"
2528
# define DD_SDK_KERNEL "Linux"
2629
# include "string_util.h"
30+
# include <errno.h>
2731
# include <fstream>
32+
# include <fcntl.h>
2833
# include <sys/types.h>
2934
# include <sys/mman.h>
30-
# include <fcntl.h>
31-
# include <errno.h>
35+
# include <sys/stat.h>
36+
# include <sys/statfs.h>
3237
# endif
3338
#elif defined(_MSC_VER)
3439
# include <windows.h>
@@ -281,5 +286,126 @@ Expected<InMemoryFile> InMemoryFile::make(StringView) {
281286
}
282287
#endif
283288

289+
namespace container {
290+
namespace {
291+
#if defined(__linux__) || defined(__unix__)
292+
/// Magic numbers from linux/magic.h:
293+
/// <https://github.com/torvalds/linux/blob/ca91b9500108d4cf083a635c2e11c884d5dd20ea/include/uapi/linux/magic.h#L71>
294+
constexpr uint64_t CGROUP_SUPER_MAGIC = 0x27e0eb;
295+
constexpr uint64_t CGROUP2_SUPER_MAGIC = 0x63677270;
296+
297+
/// Magic number from linux/proc_ns.h:
298+
/// <https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49>
299+
constexpr ino_t HOST_CGROUP_NAMESPACE_INODE = 0xeffffffb;
300+
301+
/// Represents the cgroup version of the current process.
302+
enum class Cgroup : char { v1, v2 };
303+
304+
Optional<ino_t> get_inode(std::string_view path) {
305+
struct stat buf;
306+
if (stat(path.data(), &buf) != 0) {
307+
return nullopt;
308+
}
309+
310+
return buf.st_ino;
311+
}
312+
313+
// Host namespace inode number are hardcoded, which allows for dectection of
314+
// whether the binary is running in host or not. However, it does not work when
315+
// running in a Docker in Docker environment.
316+
bool is_running_in_host_namespace() {
317+
// linux procfs file that represents the cgroup namespace of the current
318+
// process.
319+
if (auto inode = get_inode("/proc/self/ns/cgroup")) {
320+
return *inode == HOST_CGROUP_NAMESPACE_INODE;
321+
}
322+
323+
return false;
324+
}
325+
326+
Optional<Cgroup> get_cgroup_version() {
327+
struct statfs buf;
328+
329+
if (statfs("/sys/fs/cgroup", &buf) != 0) {
330+
return nullopt;
331+
}
332+
333+
if (buf.f_type == CGROUP_SUPER_MAGIC)
334+
return Cgroup::v1;
335+
else if (buf.f_type == CGROUP2_SUPER_MAGIC)
336+
return Cgroup::v2;
337+
338+
return nullopt;
339+
}
340+
341+
Optional<std::string> find_docker_container_id_from_cgroup() {
342+
auto cgroup_fd = std::ifstream("/proc/self/cgroup", std::ios::in);
343+
if (!cgroup_fd.is_open()) return nullopt;
344+
345+
return find_docker_container_id(cgroup_fd);
346+
}
347+
#endif
348+
} // namespace
349+
350+
Optional<std::string> find_docker_container_id(std::istream& source) {
351+
constexpr std::string_view docker_str = "docker-";
352+
353+
std::string line;
354+
while (std::getline(source, line)) {
355+
// Example:
356+
// `0::/system.slice/docker-abcdef0123456789abcdef0123456789.scope`
357+
if (auto beg = line.find(docker_str); beg != std::string::npos) {
358+
beg += docker_str.size();
359+
auto end = line.find(".scope", beg);
360+
if (end == std::string::npos || end - beg <= 0) {
361+
continue;
362+
}
363+
364+
auto container_id = line.substr(beg, end - beg);
365+
return container_id;
366+
}
367+
}
368+
369+
return nullopt;
370+
}
371+
372+
Optional<ContainerID> get_id() {
373+
#if defined(__linux__) || defined(__unix__)
374+
if (is_running_in_host_namespace()) {
375+
// Not in a container, no need to continue.
376+
return nullopt;
377+
}
378+
379+
auto maybe_cgroup = get_cgroup_version();
380+
if (!maybe_cgroup) return nullopt;
381+
382+
ContainerID id;
383+
switch (*maybe_cgroup) {
384+
case Cgroup::v1: {
385+
if (auto maybe_id = find_docker_container_id_from_cgroup()) {
386+
id.value = *maybe_id;
387+
id.type = ContainerID::Type::container_id;
388+
break;
389+
}
390+
}
391+
// NOTE(@dmehala): failed to find the container ID, try getting the cgroup
392+
// inode.
393+
[[fallthrough]];
394+
case Cgroup::v2: {
395+
if (auto maybe_inode = get_inode("/sys/fs/cgroup")) {
396+
id.type = ContainerID::Type::cgroup_inode;
397+
id.value = std::to_string(*maybe_inode);
398+
}
399+
}; break;
400+
}
401+
402+
return id;
403+
#else
404+
return nullopt;
405+
#endif
406+
}
407+
408+
} // namespace container
409+
284410
} // namespace tracing
285411
} // namespace datadog

src/datadog/platform_util.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,33 @@ std::string get_process_name();
7272

7373
int at_fork_in_child(void (*on_fork)());
7474

75+
namespace container {
76+
77+
struct ContainerID final {
78+
/// Type of unique ID.
79+
enum class Type : char { container_id, cgroup_inode } type;
80+
/// Identifier of the container. It _mostly_ depends on the
81+
/// cgroup version:
82+
/// - For cgroup v1, it contains the container ID.
83+
/// - For cgroup v2, it contains the "container" inode.
84+
std::string value;
85+
};
86+
87+
/// Find the docker container ID from a given source.
88+
/// This function is exposed mainly for testing purposes.
89+
///
90+
/// @param source The input from which to read the Docker container ID.
91+
/// @return An Optional containing the Docker container ID if found, otherwise
92+
/// nothing.
93+
Optional<std::string> find_docker_container_id(std::istream& source);
94+
95+
/// Function to retrieve the container metadata.
96+
///
97+
/// @return A `ContainerID` object containing id of the container in
98+
/// which the current process is running.
99+
Optional<ContainerID> get_id();
100+
101+
} // namespace container
102+
75103
} // namespace tracing
76104
} // namespace datadog

0 commit comments

Comments
 (0)