Skip to content

Commit bbaf0ee

Browse files
committed
support tracing id in logs
1 parent a10e37d commit bbaf0ee

File tree

19 files changed

+442
-182
lines changed

19 files changed

+442
-182
lines changed

docs/internals/api-gateway/readme.md

Lines changed: 61 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,66 @@ after:
2626
2727
```go
2828
type SubstrateGateway interface {
29-
CreateNode(node substrate.Node) (uint32, error)
30-
CreateTwin(relay string, pk []byte) (uint32, error)
31-
EnsureAccount(activationURL string, termsAndConditionsLink string, termsAndConditionsHash string) (info substrate.AccountInfo, err error)
32-
GetContract(id uint64) (substrate.Contract, SubstrateError)
33-
GetContractIDByNameRegistration(name string) (uint64, SubstrateError)
34-
GetFarm(id uint32) (substrate.Farm, error)
35-
GetNode(id uint32) (substrate.Node, error)
36-
GetNodeByTwinID(twin uint32) (uint32, SubstrateError)
37-
GetNodeContracts(node uint32) ([]types.U64, error)
38-
GetNodeRentContract(node uint32) (uint64, SubstrateError)
39-
GetNodes(farmID uint32) ([]uint32, error)
40-
GetPowerTarget(nodeID uint32) (power substrate.NodePower, err error)
41-
GetTwin(id uint32) (substrate.Twin, error)
42-
GetTwinByPubKey(pk []byte) (uint32, SubstrateError)
43-
Report(consumptions []substrate.NruConsumption) (types.Hash, error)
44-
SetContractConsumption(resources ...substrate.ContractResources) error
45-
SetNodePowerState(up bool) (hash types.Hash, err error)
46-
UpdateNode(node substrate.Node) (uint32, error)
47-
UpdateNodeUptimeV2(uptime uint64, timestampHint uint64) (hash types.Hash, err error)
29+
UpdateSubstrateGatewayConnection(ctx context.Context, manager substrate.Manager) (err error)
30+
CreateNode(ctx context.Context, node substrate.Node) (uint32, error)
31+
CreateTwin(ctx context.Context, relay string, pk []byte) (uint32, error)
32+
EnsureAccount(ctx context.Context, activationURL []string, termsAndConditionsLink string, termsAndConditionsHash string) (info substrate.AccountInfo, err error)
33+
GetContract(ctx context.Context, id uint64) (substrate.Contract, SubstrateError)
34+
GetContractIDByNameRegistration(ctx context.Context, name string) (uint64, SubstrateError)
35+
GetFarm(ctx context.Context, id uint32) (substrate.Farm, error)
36+
GetNode(ctx context.Context, id uint32) (substrate.Node, error)
37+
GetNodeByTwinID(ctx context.Context, twin uint32) (uint32, SubstrateError)
38+
GetNodeContracts(ctx context.Context, node uint32) ([]types.U64, error)
39+
GetNodeRentContract(ctx context.Context, node uint32) (uint64, SubstrateError)
40+
GetNodes(ctx context.Context, farmID uint32) ([]uint32, error)
41+
GetPowerTarget(ctx context.Context, nodeID uint32) (power substrate.NodePower, err error)
42+
GetTwin(ctx context.Context, id uint32) (substrate.Twin, error)
43+
GetTwinByPubKey(ctx context.Context, pk []byte) (uint32, SubstrateError)
44+
Report(ctx context.Context, consumptions []substrate.NruConsumption) (types.Hash, error)
45+
SetContractConsumption(ctx context.Context, resources ...substrate.ContractResources) error
46+
SetNodePowerState(ctx context.Context, up bool) (hash types.Hash, err error)
47+
UpdateNode(ctx context.Context, node substrate.Node) (uint32, error)
48+
UpdateNodeUptimeV2(ctx context.Context, uptime uint64, timestampHint uint64) (hash types.Hash, err error)
49+
GetTime(ctx context.Context) (time.Time, error)
50+
GetZosVersion(ctx context.Context) (string, error)
4851
}
4952
```
53+
54+
## Distributed Tracing
55+
56+
API Gateway implements distributed tracing to track requests across ZOS modules. Each request is assigned a unique trace ID that flows through the entire system.
57+
58+
### How It Works
59+
60+
1. **Context Propagation**: All interface methods accept `context.Context` as the first parameter
61+
2. **Trace ID Generation**: A unique trace ID (format: `trace-{uuid}`) is generated or extracted from the context
62+
3. **Automatic Logging**: All operations log the trace ID, enabling request correlation across modules
63+
64+
### Log Output
65+
66+
All logs include the `trace_id` field:
67+
68+
```json
69+
{"level":"debug","trace_id":"trace-abc123","method":"CreateNode","twin_id":1234,"message":"method called"}
70+
{"level":"debug","trace_id":"trace-abc123","message":"CreateNode failed, retrying"}
71+
{"level":"debug","trace_id":"trace-abc123","message":"CreateNode completed successfully"}
72+
```
73+
74+
### Searching Logs
75+
76+
To trace a complete request journey:
77+
78+
```bash
79+
# Find all logs for a specific trace ID
80+
zinit log | grep "trace-abc123"
81+
82+
# Or use journalctl
83+
journalctl -u api-gateway | grep "trace-abc123"
84+
```
85+
86+
### Benefits
87+
88+
- **Request Tracking**: Follow a provision request from arrival through flist mounting, disk preparation, and completion
89+
- **Cross-Module Correlation**: Same trace ID flows through api-gateway → provision → flist → storage modules
90+
- **Debugging**: Quickly identify which requests are causing issues
91+
- **Performance Analysis**: Track request duration across the entire system

pkg/api_gateway.go

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package pkg
22

33
import (
4+
"context"
45
"time"
56

67
"github.com/centrifuge/go-substrate-rpc-client/v4/types"
@@ -10,28 +11,28 @@ import (
1011
//go:generate zbusc -module api-gateway -version 0.0.1 -name api-gateway -package stubs github.com/threefoldtech/zosbase/pkg+SubstrateGateway stubs/api_gateway_stub.go
1112

1213
type SubstrateGateway interface {
13-
UpdateSubstrateGatewayConnection(manager substrate.Manager) (err error)
14-
CreateNode(node substrate.Node) (uint32, error)
15-
CreateTwin(relay string, pk []byte) (uint32, error)
16-
EnsureAccount(activationURL []string, termsAndConditionsLink string, termsAndConditionsHash string) (info substrate.AccountInfo, err error)
17-
GetContract(id uint64) (substrate.Contract, SubstrateError)
18-
GetContractIDByNameRegistration(name string) (uint64, SubstrateError)
19-
GetFarm(id uint32) (substrate.Farm, error)
20-
GetNode(id uint32) (substrate.Node, error)
21-
GetNodeByTwinID(twin uint32) (uint32, SubstrateError)
22-
GetNodeContracts(node uint32) ([]types.U64, error)
23-
GetNodeRentContract(node uint32) (uint64, SubstrateError)
24-
GetNodes(farmID uint32) ([]uint32, error)
25-
GetPowerTarget(nodeID uint32) (power substrate.NodePower, err error)
26-
GetTwin(id uint32) (substrate.Twin, error)
27-
GetTwinByPubKey(pk []byte) (uint32, SubstrateError)
28-
Report(consumptions []substrate.NruConsumption) (types.Hash, error)
29-
SetContractConsumption(resources ...substrate.ContractResources) error
30-
SetNodePowerState(up bool) (hash types.Hash, err error)
31-
UpdateNode(node substrate.Node) (uint32, error)
32-
UpdateNodeUptimeV2(uptime uint64, timestampHint uint64) (hash types.Hash, err error)
33-
GetTime() (time.Time, error)
34-
GetZosVersion() (string, error)
14+
UpdateSubstrateGatewayConnection(ctx context.Context, manager substrate.Manager) (err error)
15+
CreateNode(ctx context.Context, node substrate.Node) (uint32, error)
16+
CreateTwin(ctx context.Context, relay string, pk []byte) (uint32, error)
17+
EnsureAccount(ctx context.Context, activationURL []string, termsAndConditionsLink string, termsAndConditionsHash string) (info substrate.AccountInfo, err error)
18+
GetContract(ctx context.Context, id uint64) (substrate.Contract, SubstrateError)
19+
GetContractIDByNameRegistration(ctx context.Context, name string) (uint64, SubstrateError)
20+
GetFarm(ctx context.Context, id uint32) (substrate.Farm, error)
21+
GetNode(ctx context.Context, id uint32) (substrate.Node, error)
22+
GetNodeByTwinID(ctx context.Context, twin uint32) (uint32, SubstrateError)
23+
GetNodeContracts(ctx context.Context, node uint32) ([]types.U64, error)
24+
GetNodeRentContract(ctx context.Context, node uint32) (uint64, SubstrateError)
25+
GetNodes(ctx context.Context, farmID uint32) ([]uint32, error)
26+
GetPowerTarget(ctx context.Context, nodeID uint32) (power substrate.NodePower, err error)
27+
GetTwin(ctx context.Context, id uint32) (substrate.Twin, error)
28+
GetTwinByPubKey(ctx context.Context, pk []byte) (uint32, SubstrateError)
29+
Report(ctx context.Context, consumptions []substrate.NruConsumption) (types.Hash, error)
30+
SetContractConsumption(ctx context.Context, resources ...substrate.ContractResources) error
31+
SetNodePowerState(ctx context.Context, up bool) (hash types.Hash, err error)
32+
UpdateNode(ctx context.Context, node substrate.Node) (uint32, error)
33+
UpdateNodeUptimeV2(ctx context.Context, uptime uint64, timestampHint uint64) (hash types.Hash, err error)
34+
GetTime(ctx context.Context) (time.Time, error)
35+
GetZosVersion(ctx context.Context) (string, error)
3536
}
3637

3738
type SubstrateError struct {

pkg/gateway/gateway.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ type gatewayModule struct {
5555
volatile string
5656
cl zbus.Client
5757
resolver *net.Resolver
58-
substrateGateway *stubs.SubstrateGatewayStub
58+
substrateGateway *stubs.SubstrateGatewayClient
5959
// maps domain to workload id
6060
reservedDomains map[string]string
6161
domainLock sync.RWMutex
@@ -280,7 +280,7 @@ func New(ctx context.Context, cl zbus.Client, root string) (pkg.Gateway, error)
280280
if err != nil {
281281
return nil, errors.Wrap(err, "failed to load old domains")
282282
}
283-
substrateGateway := stubs.NewSubstrateGatewayStub(cl)
283+
substrateGateway := stubs.NewSubstrateGatewayClient(cl)
284284

285285
gw := &gatewayModule{
286286
cl: cl,

pkg/gateway_light/gateway.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ type gatewayModule struct {
5757
volatile string
5858
cl zbus.Client
5959
resolver *net.Resolver
60-
substrateGateway *stubs.SubstrateGatewayStub
60+
substrateGateway *stubs.SubstrateGatewayClient
6161
// maps domain to workload id
6262
reservedDomains map[string]string
6363
domainLock sync.RWMutex
@@ -282,7 +282,7 @@ func New(ctx context.Context, cl zbus.Client, root string) (pkg.Gateway, error)
282282
if err != nil {
283283
return nil, errors.Wrap(err, "failed to load old domains")
284284
}
285-
substrateGateway := stubs.NewSubstrateGatewayStub(cl)
285+
substrateGateway := stubs.NewSubstrateGatewayClient(cl)
286286

287287
gw := &gatewayModule{
288288
cl: cl,

pkg/perf/healthcheck/ntp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,6 @@ func getTimeChainWithZCL(zcl zbus.Client) (time.Time, error) {
165165
if zcl == nil {
166166
return time.Time{}, errors.New("zbus client is nil, cannot get time from chain")
167167
}
168-
gw := stubs.NewSubstrateGatewayStub(zcl)
168+
gw := stubs.NewSubstrateGatewayClient(zcl)
169169
return gw.GetTime(context.Background())
170170
}

pkg/perf/publicip/publicip_task.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func (p *publicIPValidationTask) Run(ctx context.Context) (interface{}, error) {
7979
return nil, fmt.Errorf("failed to get namespace %s: %w", testNamespace, err)
8080
}
8181
cl := perf.MustGetZbusClient(ctx)
82-
substrateGateway := stubs.NewSubstrateGatewayStub(cl)
82+
substrateGateway := stubs.NewSubstrateGatewayClient(cl)
8383
farmID := environment.MustGet().FarmID
8484

8585
shouldRun, err := isLeastValidNode(ctx, uint32(farmID), substrateGateway)
@@ -205,7 +205,7 @@ func (p *publicIPValidationTask) validateIPs(publicIPs []substrate.PublicIP, mac
205205
return report, nil
206206
}
207207

208-
func isLeastValidNode(ctx context.Context, farmID uint32, substrateGateway *stubs.SubstrateGatewayStub) (bool, error) {
208+
func isLeastValidNode(ctx context.Context, farmID uint32, substrateGateway *stubs.SubstrateGatewayClient) (bool, error) {
209209
env := environment.MustGet()
210210
gql, err := graphql.NewGraphQl(env.GraphQL...)
211211
if err != nil {

pkg/power/power.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import (
1919

2020
type PowerServer struct {
2121
consumer *events.RedisConsumer
22-
substrateGateway *stubs.SubstrateGatewayStub
22+
substrateGateway *stubs.SubstrateGatewayClient
2323

2424
// enabled means the node can power off!
2525
enabled bool
@@ -30,7 +30,7 @@ type PowerServer struct {
3030
}
3131

3232
func NewPowerServer(
33-
substrateGateway *stubs.SubstrateGatewayStub,
33+
substrateGateway *stubs.SubstrateGatewayClient,
3434
consumer *events.RedisConsumer,
3535
enabled bool,
3636
farm pkg.FarmID,

pkg/power/uptime.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ type Uptime struct {
2626
Mark utils.Mark
2727

2828
id substrate.Identity
29-
substrateGateway *stubs.SubstrateGatewayStub
29+
substrateGateway *stubs.SubstrateGatewayClient
3030
m sync.Mutex
3131
}
3232

33-
func NewUptime(substrateGateway *stubs.SubstrateGatewayStub, id substrate.Identity) (*Uptime, error) {
33+
func NewUptime(substrateGateway *stubs.SubstrateGatewayClient, id substrate.Identity) (*Uptime, error) {
3434
return &Uptime{
3535
id: id,
3636
substrateGateway: substrateGateway,

pkg/provision/auth.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ import (
1111
)
1212

1313
type substrateTwins struct {
14-
substrateGateway *stubs.SubstrateGatewayStub
14+
substrateGateway *stubs.SubstrateGatewayClient
1515
mem *lru.Cache
1616
}
1717

1818
// NewSubstrateTwins creates a substrate users db that implements the provision.Users interface.
19-
func NewSubstrateTwins(substrateGateway *stubs.SubstrateGatewayStub) (Twins, error) {
19+
func NewSubstrateTwins(substrateGateway *stubs.SubstrateGatewayClient) (Twins, error) {
2020
cache, err := lru.New(1024)
2121
if err != nil {
2222
return nil, err
@@ -50,7 +50,7 @@ type substrateAdmins struct {
5050

5151
// NewSubstrateAdmins creates a substrate twins db that implements the provision.Users interface.
5252
// but it also make sure the user is an admin
53-
func NewSubstrateAdmins(substrateGateway *stubs.SubstrateGatewayStub, farmID uint32) (Twins, error) {
53+
func NewSubstrateAdmins(substrateGateway *stubs.SubstrateGatewayClient, farmID uint32) (Twins, error) {
5454
farm, err := substrateGateway.GetFarm(context.Background(), farmID)
5555
if err != nil {
5656
return nil, errors.Wrap(err, "failed to get farm")

pkg/provision/engine.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func WithStartupOrder(t ...gridtypes.WorkloadType) EngineOption {
5252
// WithAPIGateway sets the API Gateway. If set it will
5353
// be used by the engine to fetch (and validate) the deployment contract
5454
// then contract with be available on the deployment context
55-
func WithAPIGateway(node uint32, substrateGateway *stubs.SubstrateGatewayStub) EngineOption {
55+
func WithAPIGateway(node uint32, substrateGateway *stubs.SubstrateGatewayClient) EngineOption {
5656
return &withAPIGateway{node, substrateGateway}
5757
}
5858

@@ -125,7 +125,7 @@ type NativeEngine struct {
125125
rerunAll bool
126126
// substrate specific attributes
127127
nodeID uint32
128-
substrateGateway *stubs.SubstrateGatewayStub
128+
substrateGateway *stubs.SubstrateGatewayClient
129129
callback Callback
130130
}
131131

@@ -152,7 +152,7 @@ func (o *withAdminsKeyGetter) apply(e *NativeEngine) {
152152

153153
type withAPIGateway struct {
154154
nodeID uint32
155-
substrateGateway *stubs.SubstrateGatewayStub
155+
substrateGateway *stubs.SubstrateGatewayClient
156156
}
157157

158158
func (o *withAPIGateway) apply(e *NativeEngine) {

0 commit comments

Comments
 (0)