Skip to content

Commit 35bba0f

Browse files
committed
Fix user tracing of DO SQL queries and add system tracing of them
The user tracing of DO SQL queries was setting the rows_read tag too soon, before the cursor actually iterated over the result rows. This lead to under-reporting of rows_read in these trace spans. We can fix this by passing the span into the Cursor, allowing it to be finalized when the query itself is done. For some queries this will still be immediately because there aren't result rows to iterate over, but for others this will keep the span held open until either the result rows have all been read or the cursor has been canceled/closed. There's still one case here that's a little awkward: if the query throws an exception, we don't add any tags at all (either before this change or after it). We may want to add a try-catch here that adds an error span to the traces when running the sql statement throws an exception, but we can decide on that separately since it does come involve making the code more complex and adding a bit of runtime overhead.
1 parent 693b5e7 commit 35bba0f

File tree

2 files changed

+38
-9
lines changed

2 files changed

+38
-9
lines changed

src/workerd/api/sql.c++

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ SqlStorage::~SqlStorage() {}
2525

2626
jsg::Ref<SqlStorage::Cursor> SqlStorage::exec(
2727
jsg::Lock& js, jsg::JsString querySql, jsg::Arguments<BindingValue> bindings) {
28-
auto userSpan = IoContext::current().makeUserTraceSpan("durable_object_storage_exec"_kjc);
28+
auto& context = IoContext::current();
29+
auto span = context.makeTraceSpan("durable_object_storage_exec"_kjc);
30+
auto userSpan = context.makeUserTraceSpan("durable_object_storage_exec"_kjc);
2931
userSpan.setTag("db.system.name"_kjc, "cloudflare-durable-object-sql"_kjc);
3032
userSpan.setTag("db.operation.name"_kjc, "exec"_kjc);
3133
userSpan.setTag("db.query.text"_kjc, kj::str(querySql));
@@ -52,6 +54,20 @@ jsg::Ref<SqlStorage::Cursor> SqlStorage::exec(
5254
}
5355
statementCache.lru.add(*slot.get());
5456

57+
// In order to get accurate statistics, we have to keep the spans around until the query is
58+
// actually done, which for read queries that iterate over a cursor won't be until later.
59+
kj::Maybe<kj::Function<void(Cursor&)>> doneCallback;
60+
if (span.isObserved() || userSpan.isObserved()) {
61+
doneCallback = [span = kj::mv(span), userSpan = kj::mv(userSpan)](Cursor& cursor) mutable {
62+
int64_t rowsRead = cursor.getRowsRead();
63+
int64_t rowsWritten = cursor.getRowsWritten();
64+
span.setTag("rows_read"_kjc, rowsRead);
65+
span.setTag("rows_written"_kjc, rowsWritten);
66+
userSpan.setTag("cloudflare.durable_object.response.rows_read"_kjc, rowsRead);
67+
userSpan.setTag("cloudflare.durable_object.response.rows_written"_kjc, rowsWritten);
68+
};
69+
}
70+
5571
if (slot->isShared()) {
5672
// Oops, this CachedStatement is currently in-use (presumably by a Cursor).
5773
//
@@ -61,10 +77,11 @@ jsg::Ref<SqlStorage::Cursor> SqlStorage::exec(
6177
// In theory we could try to cache multiple copies of the statement, but as this is probably
6278
// exceedingly rare, it is not worth the added code complexity.
6379
SqliteDatabase::Regulator& regulator = *this;
64-
return js.alloc<Cursor>(js, db, regulator, js.toString(querySql), kj::mv(bindings));
80+
return js.alloc<Cursor>(
81+
js, kj::mv(doneCallback), db, regulator, js.toString(querySql), kj::mv(bindings));
6582
}
6683

67-
auto result = js.alloc<Cursor>(js, slot.addRef(), kj::mv(bindings));
84+
auto result = js.alloc<Cursor>(js, kj::mv(doneCallback), slot.addRef(), kj::mv(bindings));
6885

6986
// If the statement cache grew too big, drop the least-recently-used entry.
7087
while (statementCache.totalSize > SQL_STATEMENT_CACHE_MAX_SIZE) {
@@ -75,23 +92,26 @@ jsg::Ref<SqlStorage::Cursor> SqlStorage::exec(
7592
KJ_ASSERT(statementCache.map.eraseMatch(oldQuery));
7693
}
7794

78-
userSpan.setTag("cloudflare.durable_object.response.rows_read"_kjc,
79-
static_cast<int64_t>(result->getRowsRead()));
80-
userSpan.setTag("cloudflare.durable_object.response.rows_written"_kjc,
81-
static_cast<int64_t>(result->getRowsWritten()));
8295
return result;
8396
}
8497

8598
SqlStorage::IngestResult SqlStorage::ingest(jsg::Lock& js, kj::String querySql) {
86-
auto userSpan = IoContext::current().makeUserTraceSpan("durable_object_storage_ingest"_kjc);
99+
auto& context = IoContext::current();
100+
auto span = context.makeTraceSpan("durable_object_storage_ingest"_kjc);
101+
auto userSpan = context.makeUserTraceSpan("durable_object_storage_ingest"_kjc);
87102
SqliteDatabase::Regulator& regulator = *this;
88103
auto result = getDb(js).ingestSql(regulator, querySql);
104+
105+
span.setTag("rows_read"_kjc, static_cast<int64_t>(result.rowsRead));
106+
span.setTag("rows_written"_kjc, static_cast<int64_t>(result.rowsWritten));
107+
span.setTag("statement_count"_kjc, static_cast<int64_t>(result.statementCount));
89108
userSpan.setTag(
90109
"cloudflare.durable_object.response.rows_read"_kjc, static_cast<int64_t>(result.rowsRead));
91110
userSpan.setTag("cloudflare.durable_object.response.rows_written"_kjc,
92111
static_cast<int64_t>(result.rowsWritten));
93112
userSpan.setTag("cloudflare.durable_object.response.statement_count"_kjc,
94113
static_cast<int64_t>(result.statementCount));
114+
95115
return IngestResult(
96116
kj::str(result.remainder), result.rowsRead, result.rowsWritten, result.statementCount);
97117
}
@@ -376,6 +396,11 @@ void SqlStorage::Cursor::endQuery(State& stateRef) {
376396
rowsRead = stateRef.query.getRowsRead();
377397
rowsWritten = stateRef.query.getRowsWritten();
378398

399+
KJ_IF_SOME(cb, doneCallback) {
400+
cb(*this);
401+
doneCallback = kj::none;
402+
}
403+
379404
// Clean up the query proactively.
380405
state = kj::none;
381406
}

src/workerd/api/sql.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ class SqlStorage final: public jsg::Object, private SqliteDatabase::Regulator {
179179
class SqlStorage::Cursor final: public jsg::Object {
180180
public:
181181
template <typename... Params>
182-
Cursor(jsg::Lock& js, Params&&... params) {
182+
Cursor(jsg::Lock& js, kj::Maybe<kj::Function<void(Cursor&)>> doneCb, Params&&... params)
183+
: doneCallback(kj::mv(doneCb)) {
183184
auto stateObj = kj::heap<State>(kj::fwd<Params>(params)...);
184185
initColumnNames(js, *stateObj);
185186
if (stateObj->query.isDone()) {
@@ -259,6 +260,9 @@ class SqlStorage::Cursor final: public jsg::Object {
259260
// Nulled out when query is done or canceled.
260261
kj::Maybe<IoOwn<State>> state;
261262

263+
// Called when the query is done or canceled.
264+
kj::Maybe<kj::Function<void(Cursor&)>> doneCallback;
265+
262266
// True if the cursor was canceled by a new call to the same statement. This is used only to
263267
// flag an error if the application tries to reuse the cursor.
264268
bool canceled = false;

0 commit comments

Comments
 (0)