Skip to content

Commit d231b49

Browse files
authored
[Feature][SIG-27426] Add hooks to fetch metadata for Databricks queries (#14)
To make debugging Databricks queries easier, add some basic hooks to fetch metadata for queries (e.g. session ID, query ID) from Databricks queries. The intention here is that Multiplex will register callbacks using `WithOpenSessionHook` and `WithOperationMetadataHook` that will store the metadata that Databricks returns as trace tags. I went with this approach instead of stuffing the metadata in the Databricks driver's `Rows` struct, mainly because `Rows` doesn't always get returned, e.g. when executing statements that don't return any data, or when there are errors. While the goal here is to simplify debugging failed queries, it's possible for a Databricks query to fail without giving us a query ID, namely, if `ExecuteStatement` returns an error. This happens when the query contains a syntax error, for example. In that case, we'll at least have the session ID, but we can continue to iterate on fetching query IDs when `ExecuteStatement` fails. However, this will enable us to get query IDs for queries that fail at any later point. Signed-off-by: Eric Bannatyne <[email protected]>
1 parent de2f224 commit d231b49

File tree

5 files changed

+69
-0
lines changed

5 files changed

+69
-0
lines changed

connection.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ func (c *Conn) QueryContext(ctx context.Context, q string, args []driver.NamedVa
7979
if err != nil {
8080
return nil, hive.WithStack(err)
8181
}
82+
callOpenSessionHook(ctx, session.GetSessionId())
8283

8384
tmpl := template(q)
8485
stmt, err := statement(tmpl, args)
@@ -96,6 +97,7 @@ func (c *Conn) ExecContext(ctx context.Context, q string, args []driver.NamedVal
9697
if err != nil {
9798
return nil, hive.WithStack(err)
9899
}
100+
callOpenSessionHook(ctx, session.GetSessionId())
99101

100102
tmpl := template(q)
101103
stmt, err := statement(tmpl, args)

context.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package dbsql
2+
3+
import (
4+
"context"
5+
)
6+
7+
type contextKey string
8+
9+
const (
10+
openSessionHook contextKey = "OPEN_SESSION_HOOK"
11+
operationMetadataHook contextKey = "OPERATION_METADATA_HOOK"
12+
)
13+
14+
// WithOpenSessionHook registers a callback that will be executed with the
15+
// Databricks session ID as input when a session is acquired for running a query,
16+
// whether by reusing a cached session ID or by creating a new session.
17+
func WithOpenSessionHook(
18+
ctx context.Context,
19+
fn func(string),
20+
) context.Context {
21+
return context.WithValue(ctx, openSessionHook, fn)
22+
}
23+
24+
func callOpenSessionHook(ctx context.Context, sessionId string) {
25+
callContextHook(ctx, openSessionHook, sessionId)
26+
}
27+
28+
type OperationMetadata interface {
29+
GetOperationId() string
30+
HasResultSet() bool
31+
RowsAffected() float64
32+
}
33+
34+
// WithOperationMetadataHook registers a callback that will be executed after an
35+
// ExecuteStatement thrift request.
36+
func WithOperationMetadataHook(
37+
ctx context.Context,
38+
fn func(OperationMetadata),
39+
) context.Context {
40+
return context.WithValue(ctx, operationMetadataHook, fn)
41+
}
42+
43+
func callOperationMetadataHook(ctx context.Context, metadata OperationMetadata) {
44+
callContextHook(ctx, operationMetadataHook, metadata)
45+
}
46+
47+
func callContextHook[T any](ctx context.Context, key contextKey, input T) {
48+
val := ctx.Value(key)
49+
if val == nil {
50+
return
51+
}
52+
fn, ok := val.(func(T))
53+
if !ok {
54+
return
55+
}
56+
fn(input)
57+
}

hive/operation.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ type Operation struct {
1313
h *cli_service.TOperationHandle
1414
}
1515

16+
func (op *Operation) GetOperationId() string {
17+
return guid(op.h.GetOperationId().GUID)
18+
}
19+
1620
// HasResultSet return if operation has result set
1721
func (op *Operation) HasResultSet() bool {
1822
return op.h.GetHasResultSet()

hive/session.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ type Session struct {
1212
h *cli_service.TSessionHandle
1313
}
1414

15+
func (s *Session) GetSessionId() string {
16+
return guid(s.h.GetSessionId().GUID)
17+
}
18+
1519
// Ping checks the connection
1620
func (s *Session) Ping(ctx context.Context) error {
1721
req := cli_service.TGetInfoReq{

statement.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ func query(ctx context.Context, session *hive.Session, stmt string) (driver.Rows
121121
if err != nil {
122122
return nil, hive.WithStack(err)
123123
}
124+
callOperationMetadataHook(ctx, operation)
124125

125126
schema, err := operation.GetResultSetMetadata(ctx)
126127
if err != nil {
@@ -144,6 +145,7 @@ func exec(ctx context.Context, session *hive.Session, stmt string) (driver.Resul
144145
if err != nil {
145146
return nil, hive.WithStack(err)
146147
}
148+
callOperationMetadataHook(ctx, operation)
147149

148150
if err := operation.Close(ctx); err != nil {
149151
return nil, hive.WithStack(err)

0 commit comments

Comments
 (0)