Skip to content

Commit 320a217

Browse files
Omri Zenevayuvalif
authored andcommitted
tracer/osd/librados/build/rgw: rgw and osd end2end tracing using opentelemetry
* build: add opentelemetry to cmake system crimson targets that uses Message.cc/h are built before opentelemetry (o-tel), so we need to build o-tel eralier so we also add the library to the include path earlier this shoud work for WITH_JAEGER flag both the ON/OFF cases, and for librados where the compilation flag is ignored * msg/tracer: add o-tel trace to Messages with decode/encode function in tracer.h some files that uses Message.cc/h just need the encode/decode functions and not all others functions. some crimson targets does not link with ceph_context (common) which is required for tracer.cc file. so we just need to include that functions * librados: Add opentelemtry trace param for aio_operate and operate methods in order to propagate the trace info I added the otel-trace as an extra param. in some places, there already was a blkin trace info, and since it is not used in other places we can safely change it to o-tel trace info. this will be done in another commit, so the cleanup of blkin trace will be in a dedicated commit * osd: use the o-tel trace of the msg as a parent span of the osd trace if there is a valid span in the msg, we will add this op to the request trace, otherwise it will start a new trace for the OSD op * rgw: pass put obj trace info to librados in order to make it possible, I saved the trace info inside the sal::Object, so we can use it later when writing the object to rados it could be used also later for read ops. note the trace field of req_state is initalized only in rgw_process, so it's also required in librgw request flow * prevent breaking channges to kSize. make sure that changes between components built with different versions of OTEL do not break message compatibility Signed-off-by: Omri Zeneva <[email protected]>
1 parent ce47bb1 commit 320a217

31 files changed

+281
-130
lines changed

cmake/modules/BuildOpentelemetry.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,5 @@ function(build_opentelemetry)
8282
PROPERTIES
8383
INTERFACE_LINK_LIBRARIES "${opentelemetry_deps}"
8484
INTERFACE_INCLUDE_DIRECTORIES "${opentelemetry_include_dir}")
85+
include_directories(SYSTEM "${opentelemetry_include_dir}")
8586
endfunction()

src/CMakeLists.txt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,15 @@ if (WITH_BLKIN)
301301
add_subdirectory(blkin/blkin-lib)
302302
endif(WITH_BLKIN)
303303

304+
if(WITH_JAEGER)
305+
find_package(thrift 0.13.0 REQUIRED)
306+
include(BuildOpentelemetry)
307+
build_opentelemetry()
308+
add_library(jaeger_base INTERFACE)
309+
target_link_libraries(jaeger_base INTERFACE opentelemetry::libopentelemetry
310+
thrift::libthrift)
311+
endif()
312+
304313
set(mds_files)
305314
list(APPEND mds_files
306315
mds/MDSMap.cc
@@ -441,12 +450,6 @@ target_compile_definitions(common-objs PRIVATE
441450
add_dependencies(common-objs legacy-option-headers)
442451

443452
if(WITH_JAEGER)
444-
find_package(thrift 0.13.0 REQUIRED)
445-
include(BuildOpentelemetry)
446-
build_opentelemetry()
447-
add_library(jaeger_base INTERFACE)
448-
target_link_libraries(jaeger_base INTERFACE opentelemetry::libopentelemetry
449-
thrift::libthrift)
450453
add_dependencies(common-objs jaeger_base)
451454
target_link_libraries(common-objs jaeger_base)
452455
endif()

src/common/tracer.cc

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ jspan_ptr Tracer::start_trace(opentelemetry::nostd::string_view trace_name, bool
6060
}
6161

6262
jspan_ptr Tracer::add_span(opentelemetry::nostd::string_view span_name, const jspan_ptr& parent_span) {
63-
if (parent_span && parent_span->IsRecording()) {
64-
ceph_assert(tracer);
63+
if (is_enabled() && parent_span && parent_span->IsRecording()) {
6564
opentelemetry::trace::StartSpanOptions span_opts;
6665
span_opts.parent = parent_span->GetContext();
6766
ldout(cct, 20) << "adding span " << span_name << " " << dendl;
@@ -85,41 +84,6 @@ bool Tracer::is_enabled() const {
8584
return cct->_conf->jaeger_tracing_enable;
8685
}
8786

88-
void encode(const jspan_context& span_ctx, bufferlist& bl, uint64_t f) {
89-
ENCODE_START(1, 1, bl);
90-
using namespace opentelemetry;
91-
using namespace trace;
92-
auto is_valid = span_ctx.IsValid();
93-
encode(is_valid, bl);
94-
if (is_valid) {
95-
encode_nohead(std::string_view(reinterpret_cast<const char*>(span_ctx.trace_id().Id().data()), TraceId::kSize), bl);
96-
encode_nohead(std::string_view(reinterpret_cast<const char*>(span_ctx.span_id().Id().data()), SpanId::kSize), bl);
97-
encode(span_ctx.trace_flags().flags(), bl);
98-
}
99-
ENCODE_FINISH(bl);
100-
}
101-
102-
void decode(jspan_context& span_ctx, bufferlist::const_iterator& bl) {
103-
using namespace opentelemetry;
104-
using namespace trace;
105-
DECODE_START(1, bl);
106-
bool is_valid;
107-
decode(is_valid, bl);
108-
if (is_valid) {
109-
std::array<uint8_t, TraceId::kSize> trace_id;
110-
std::array<uint8_t, SpanId::kSize> span_id;
111-
uint8_t flags;
112-
decode(trace_id, bl);
113-
decode(span_id, bl);
114-
decode(flags, bl);
115-
span_ctx = SpanContext(
116-
TraceId(nostd::span<uint8_t, TraceId::kSize>(trace_id)),
117-
SpanId(nostd::span<uint8_t, SpanId::kSize>(span_id)),
118-
TraceFlags(flags),
119-
true);
120-
}
121-
DECODE_FINISH(bl);
122-
}
12387
} // namespace tracing
12488

12589
#endif // HAVE_JAEGER

src/common/tracer.h

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#pragma once
55

66
#include "acconfig.h"
7-
#include "include/buffer.h"
7+
#include "include/encoding.h"
88

99
#ifdef HAVE_JAEGER
1010
#include "opentelemetry/trace/provider.h"
@@ -16,6 +16,11 @@ using jspan_attribute = opentelemetry::common::AttributeValue;
1616

1717
namespace tracing {
1818

19+
static constexpr int TraceIdkSize = 16;
20+
static constexpr int SpanIdkSize = 8;
21+
static_assert(TraceIdkSize == opentelemetry::trace::TraceId::kSize);
22+
static_assert(SpanIdkSize == opentelemetry::trace::SpanId::kSize);
23+
1924
class Tracer {
2025
private:
2126
const static opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer> noop_tracer;
@@ -24,6 +29,7 @@ class Tracer {
2429
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer> tracer;
2530

2631
public:
32+
2733
Tracer() = default;
2834

2935
void init(CephContext* _cct, opentelemetry::nostd::string_view service_name);
@@ -46,8 +52,41 @@ class Tracer {
4652

4753
};
4854

49-
void encode(const jspan_context& span, ceph::buffer::list& bl, uint64_t f = 0);
50-
void decode(jspan_context& span_ctx, ceph::buffer::list::const_iterator& bl);
55+
inline void encode(const jspan_context& span_ctx, bufferlist& bl, uint64_t f = 0) {
56+
ENCODE_START(1, 1, bl);
57+
using namespace opentelemetry;
58+
using namespace trace;
59+
auto is_valid = span_ctx.IsValid();
60+
encode(is_valid, bl);
61+
if (is_valid) {
62+
encode_nohead(std::string_view(reinterpret_cast<const char*>(span_ctx.trace_id().Id().data()), TraceIdkSize), bl);
63+
encode_nohead(std::string_view(reinterpret_cast<const char*>(span_ctx.span_id().Id().data()), SpanIdkSize), bl);
64+
encode(span_ctx.trace_flags().flags(), bl);
65+
}
66+
ENCODE_FINISH(bl);
67+
}
68+
69+
inline void decode(jspan_context& span_ctx, bufferlist::const_iterator& bl) {
70+
using namespace opentelemetry;
71+
using namespace trace;
72+
DECODE_START(1, bl);
73+
bool is_valid;
74+
decode(is_valid, bl);
75+
if (is_valid) {
76+
std::array<uint8_t, TraceIdkSize> trace_id;
77+
std::array<uint8_t, SpanIdkSize> span_id;
78+
uint8_t flags;
79+
decode(trace_id, bl);
80+
decode(span_id, bl);
81+
decode(flags, bl);
82+
span_ctx = SpanContext(
83+
TraceId(nostd::span<uint8_t, TraceIdkSize>(trace_id)),
84+
SpanId(nostd::span<uint8_t, SpanIdkSize>(span_id)),
85+
TraceFlags(flags),
86+
true);
87+
}
88+
DECODE_FINISH(bl);
89+
}
5190

5291
} // namespace tracing
5392

@@ -63,10 +102,20 @@ class Value {
63102

64103
using jspan_attribute = Value;
65104

66-
struct jspan_context {
67-
jspan_context() {}
68-
jspan_context(bool sampled_flag, bool is_remote) {}
105+
namespace opentelemetry {
106+
inline namespace v1 {
107+
namespace trace {
108+
class SpanContext {
109+
public:
110+
SpanContext() = default;
111+
SpanContext(bool sampled_flag, bool is_remote) {}
112+
bool IsValid() const { return false;}
69113
};
114+
} // namespace trace
115+
} // namespace v1
116+
} // namespace opentelemetry
117+
118+
using jspan_context = opentelemetry::v1::trace::SpanContext;
70119

71120
class jspan {
72121
jspan_context _ctx;
@@ -76,7 +125,7 @@ class jspan {
76125
void AddEvent(std::string_view) {}
77126
void AddEvent(std::string_view, std::initializer_list<std::pair<std::string_view, jspan_attribute>> fields) {}
78127
template <typename T> void AddEvent(std::string_view name, const T& fields = {}) {}
79-
const jspan_context& GetContext() { return _ctx; }
128+
jspan_context GetContext() const { return _ctx; }
80129
void UpdateName(std::string_view) {}
81130
bool IsRecording() { return false; }
82131
};

src/include/rados/librados.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,10 +1169,12 @@ inline namespace v14_2_0 {
11691169
// compound object operations
11701170
int operate(const std::string& oid, ObjectWriteOperation *op);
11711171
int operate(const std::string& oid, ObjectWriteOperation *op, int flags);
1172+
int operate(const std::string& oid, ObjectWriteOperation *op, int flags, const jspan_context *trace_info);
11721173
int operate(const std::string& oid, ObjectReadOperation *op, bufferlist *pbl);
11731174
int operate(const std::string& oid, ObjectReadOperation *op, bufferlist *pbl, int flags);
11741175
int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op);
11751176
int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op, int flags);
1177+
int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op, int flags, const jspan_context *trace_info);
11761178
/**
11771179
* Schedule an async write operation with explicit snapshot parameters
11781180
*

src/include/rados/librados_fwd.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@
33

44
struct blkin_trace_info;
55

6+
namespace opentelemetry {
7+
inline namespace v1 {
8+
namespace trace {
9+
10+
class SpanContext;
11+
12+
} // namespace trace
13+
} // inline namespace v1
14+
} // namespace opentelemetry
15+
16+
using jspan_context = opentelemetry::v1::trace::SpanContext;
17+
618
namespace libradosstriper {
719

820
class RadosStriper;

src/librados/IoCtxImpl.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ int librados::IoCtxImpl::writesame(const object_t& oid, bufferlist& bl,
637637
}
638638

639639
int librados::IoCtxImpl::operate(const object_t& oid, ::ObjectOperation *o,
640-
ceph::real_time *pmtime, int flags)
640+
ceph::real_time *pmtime, int flags, const jspan_context* otel_trace)
641641
{
642642
ceph::real_time ut = (pmtime ? *pmtime :
643643
ceph::real_clock::now());
@@ -664,7 +664,7 @@ int librados::IoCtxImpl::operate(const object_t& oid, ::ObjectOperation *o,
664664
oid, oloc,
665665
*o, snapc, ut,
666666
flags | extra_op_flags,
667-
oncommit, &ver);
667+
oncommit, &ver, osd_reqid_t(), nullptr, otel_trace);
668668
objecter->op_submit(objecter_op);
669669

670670
{
@@ -753,7 +753,7 @@ int librados::IoCtxImpl::aio_operate(const object_t& oid,
753753
::ObjectOperation *o, AioCompletionImpl *c,
754754
const SnapContext& snap_context,
755755
const ceph::real_time *pmtime, int flags,
756-
const blkin_trace_info *trace_info)
756+
const blkin_trace_info *trace_info, const jspan_context *otel_trace)
757757
{
758758
FUNCTRACE(client->cct);
759759
OID_EVENT_TRACE(oid.name.c_str(), "RADOS_WRITE_OP_BEGIN");
@@ -779,7 +779,7 @@ int librados::IoCtxImpl::aio_operate(const object_t& oid,
779779
trace.event("init root span");
780780
Objecter::Op *op = objecter->prepare_mutate_op(
781781
oid, oloc, *o, snap_context, ut, flags | extra_op_flags,
782-
oncomplete, &c->objver, osd_reqid_t(), &trace);
782+
oncomplete, &c->objver, osd_reqid_t(), &trace, otel_trace);
783783
objecter->op_submit(op, &c->tid);
784784
trace.event("rados operate op submitted");
785785

src/librados/IoCtxImpl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ struct librados::IoCtxImpl {
154154
int getxattrs(const object_t& oid, std::map<std::string, bufferlist>& attrset);
155155
int rmxattr(const object_t& oid, const char *name);
156156

157-
int operate(const object_t& oid, ::ObjectOperation *o, ceph::real_time *pmtime, int flags=0);
157+
int operate(const object_t& oid, ::ObjectOperation *o, ceph::real_time *pmtime, int flags=0, const jspan_context *otel_trace = nullptr);
158158
int operate_read(const object_t& oid, ::ObjectOperation *o, bufferlist *pbl, int flags=0);
159159
int aio_operate(const object_t& oid, ::ObjectOperation *o,
160160
AioCompletionImpl *c, const SnapContext& snap_context,
161161
const ceph::real_time *pmtime, int flags,
162-
const blkin_trace_info *trace_info = nullptr);
162+
const blkin_trace_info *trace_info = nullptr, const jspan_context *otel_trace = nullptr);
163163
int aio_operate_read(const object_t& oid, ::ObjectOperation *o,
164164
AioCompletionImpl *c, int flags, bufferlist *pbl, const blkin_trace_info *trace_info = nullptr);
165165

src/librados/librados_asio.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ auto async_write(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
152152
template <typename ExecutionContext, typename CompletionToken>
153153
auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
154154
ObjectReadOperation *read_op, int flags,
155-
CompletionToken&& token)
155+
CompletionToken&& token, const jspan_context* trace_ctx = nullptr)
156156
{
157157
using Op = detail::AsyncOp<bufferlist>;
158158
using Signature = typename Op::Signature;
@@ -176,15 +176,15 @@ auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
176176
template <typename ExecutionContext, typename CompletionToken>
177177
auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
178178
ObjectWriteOperation *write_op, int flags,
179-
CompletionToken &&token)
179+
CompletionToken &&token, const jspan_context* trace_ctx = nullptr)
180180
{
181181
using Op = detail::AsyncOp<void>;
182182
using Signature = typename Op::Signature;
183183
boost::asio::async_completion<CompletionToken, Signature> init(token);
184184
auto p = Op::create(ctx.get_executor(), init.completion_handler);
185185
auto& op = p->user_data;
186186

187-
int ret = io.aio_operate(oid, op.aio_completion.get(), write_op, flags);
187+
int ret = io.aio_operate(oid, op.aio_completion.get(), write_op, flags, trace_ctx);
188188
if (ret < 0) {
189189
auto ec = boost::system::error_code{-ret, librados::detail::err_category()};
190190
ceph::async::post(std::move(p), ec);

src/librados/librados_cxx.cc

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,14 @@ int librados::IoCtx::operate(const std::string& oid, librados::ObjectWriteOperat
15251525
return io_ctx_impl->operate(obj, &o->impl->o, (ceph::real_time *)o->impl->prt, translate_flags(flags));
15261526
}
15271527

1528+
int librados::IoCtx::operate(const std::string& oid, librados::ObjectWriteOperation *o, int flags, const jspan_context* otel_trace)
1529+
{
1530+
object_t obj(oid);
1531+
if (unlikely(!o->impl))
1532+
return -EINVAL;
1533+
return io_ctx_impl->operate(obj, &o->impl->o, (ceph::real_time *)o->impl->prt, translate_flags(flags), otel_trace);
1534+
}
1535+
15281536
int librados::IoCtx::operate(const std::string& oid, librados::ObjectReadOperation *o, bufferlist *pbl)
15291537
{
15301538
object_t obj(oid);
@@ -1550,6 +1558,7 @@ int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
15501558
return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
15511559
io_ctx_impl->snapc, o->impl->prt, 0);
15521560
}
1561+
15531562
int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
15541563
ObjectWriteOperation *o, int flags)
15551564
{
@@ -1558,7 +1567,18 @@ int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
15581567
return -EINVAL;
15591568
return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
15601569
io_ctx_impl->snapc, o->impl->prt,
1561-
translate_flags(flags));
1570+
translate_flags(flags), nullptr);
1571+
}
1572+
1573+
int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
1574+
ObjectWriteOperation *o, int flags, const jspan_context* otel_trace)
1575+
{
1576+
object_t obj(oid);
1577+
if (unlikely(!o->impl))
1578+
return -EINVAL;
1579+
return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
1580+
io_ctx_impl->snapc, o->impl->prt,
1581+
translate_flags(flags), nullptr, otel_trace);
15621582
}
15631583

15641584
int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,

0 commit comments

Comments
 (0)