Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/config/cre_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ type CRE interface {
UseLocalTimeProvider() bool
EnableDKGRecipient() bool
Linking() CRELinking
// DebugMode returns true if debug mode is enabled for workflow engines.
// When enabled, additional OTel tracing and logging is performed.
DebugMode() bool
}

// WorkflowFetcher defines configuration for fetching workflow files
Expand Down
1 change: 1 addition & 0 deletions core/config/pyroscope_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ type Pyroscope interface {
AuthToken() string
ServerAddress() string
Environment() string
LinkTracesToProfiles() bool
}
17 changes: 15 additions & 2 deletions core/config/toml/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1639,8 +1639,9 @@ func (p *AutoPprof) setFrom(f *AutoPprof) {
}

type Pyroscope struct {
ServerAddress *string
Environment *string
ServerAddress *string
Environment *string
LinkTracesToProfiles *bool
}

func (p *Pyroscope) setFrom(f *Pyroscope) {
Expand All @@ -1650,6 +1651,9 @@ func (p *Pyroscope) setFrom(f *Pyroscope) {
if v := f.Environment; v != nil {
p.Environment = v
}
if v := f.LinkTracesToProfiles; v != nil {
p.LinkTracesToProfiles = v
}
}

type Sentry struct {
Expand Down Expand Up @@ -1885,6 +1889,11 @@ type CreConfig struct {
UseLocalTimeProvider *bool `toml:",omitempty"`
EnableDKGRecipient *bool `toml:",omitempty"`
Linking *LinkingConfig `toml:",omitempty"`
// DebugMode enables additional tracing and logging for workflow engines.
// When enabled, OTel traces are created for workflow execution and syncer events.
// Requires [Telemetry].Enabled = true for traces to be exported.
// WARNING: This is not suitable for production use due to performance overhead.
DebugMode *bool `toml:",omitempty"`
}

// WorkflowFetcherConfig holds the configuration for fetching workflow files
Expand Down Expand Up @@ -1941,6 +1950,10 @@ func (c *CreConfig) setFrom(f *CreConfig) {
c.Linking.TLSEnabled = v
}
}

if f.DebugMode != nil {
c.DebugMode = f.DebugMode
}
}

func (w *WorkflowFetcherConfig) ValidateConfig() error {
Expand Down
19 changes: 14 additions & 5 deletions core/logger/pyroscope.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package logger

import (
"os"
"runtime"

"github.com/grafana/pyroscope-go"
Expand All @@ -19,6 +20,10 @@ func StartPyroscope(pyroConfig config.Pyroscope, pprofConfig PprofConfig) (*pyro
runtime.SetBlockProfileRate(pprofConfig.BlockProfileRate())
runtime.SetMutexProfileFraction(pprofConfig.MutexProfileFraction())

// Increase memory profiling sample rate for better granularity
// Default is 512KB (524288 bytes) per sample
// runtime.MemProfileRate = 512 * 1024 // 512KB per sample

sha, ver := static.Short()

return pyroscope.Start(pyroscope.Config{
Expand All @@ -31,11 +36,15 @@ func StartPyroscope(pyroConfig config.Pyroscope, pprofConfig PprofConfig) (*pyro
// We disable logging the profiling info, it will be in the Pyroscope instance anyways...
Logger: nil,

Tags: map[string]string{
"SHA": sha,
"Version": ver,
"Environment": pyroConfig.Environment(),
},
Tags: func() map[string]string {
hostname, _ := os.Hostname()
return map[string]string{
"SHA": sha,
"Version": ver,
"Environment": pyroConfig.Environment(),
"hostname": hostname, // set hostname, so we can distinguish between nodes in the same environment
}
}(),

ProfileTypes: []pyroscope.ProfileType{
// these profile types are enabled by default:
Expand Down
54 changes: 54 additions & 0 deletions core/scripts/cre/environment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1736,6 +1736,60 @@ go run . env start --with-beholder
go run . env beholder start
```

### OTel Tracing Configuration

To enable OpenTelemetry (OTel) tracing for workflow engines and see traces in Tempo/Grafana, **multiple configuration toggles must be set**:

| Toggle | Location | Required Value | Purpose |
|--------|----------|----------------|---------|
| `Telemetry.Enabled` | Node TOML | `true` | Enables the OTel exporter |
| `Telemetry.TraceSampleRatio` | Node TOML | `> 0` (e.g., `1.0`) | Controls sampling rate (0 = no traces, 1 = 100%) |
| `CRE.DebugMode` | Node TOML | `true` | Enables detailed tracing in workflow engines and syncer |
| `OTEL_SERVICE_NAME` | Environment variable | e.g., `chainlink-node` | Sets the service name for traces in Tempo |
| `Pyroscope.LinkTracesToProfiles` | Node TOML | `true` | Enables traces-to-profiles linking in Grafana (requires Pyroscope) |

**Example TOML configuration:**

```toml
[Telemetry]
Enabled = true
Endpoint = 'host.docker.internal:4317'
InsecureConnection = true
TraceSampleRatio = 1.0 # 100% sampling - adjust for production

[CRE]
DebugMode = true # WARNING: Not suitable for production due to overhead

[Pyroscope]
ServerAddress = 'http://host.docker.internal:4040'
LinkTracesToProfiles = true # Enables traces-to-profiles in Grafana
```

**Example environment variable (in nodeset config):**

```toml
[[nodesets]]
env_vars = { OTEL_SERVICE_NAME = "chainlink-node" }
```

**Common issues:**

| Symptom | Likely Cause |
|---------|--------------|
| No traces at all | `Telemetry.Enabled = false` or `TraceSampleRatio = 0` |
| No workflow engine traces | `CRE.DebugMode = false` |
| Traces show `unknown_service:chainlink` | Missing `OTEL_SERVICE_NAME` env var |
| Traces not exported | Telemetry endpoint unreachable (check `go run . obs up -f `) |
| No traces-to-profiles link in Grafana | `Pyroscope.LinkTracesToProfiles = false` or Pyroscope not running |

**Important notes:**

- `CRE.DebugMode` adds performance overhead and should only be enabled during development/debugging, not in production environments.
- **Tracing is only implemented for V2 components:**
- **V2 Syncer**: Only used when workflow registry contracts are v2.x. If you're using v1.x contracts, the V1 syncer is used and has no tracing.
- **V2 Engine (NoDAG)**: Only used by V2/NoDAG workflows. V1/DAG workflows use the V1 engine which has no tracing.
- To use tracing, ensure your environment is configured with **v2 workflow registry contracts** and you're deploying **V2 workflows**.

### Expected Error Messages

If these telemetry services are not running, you will see frequent "expected" error messages in the logs due to connection failures:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "write-evm-1337", "evm-2337"]

# Capability config overrides for this DON.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
supported_evm_chains = [1337]
supported_sol_chains = ["22222222222222222222222222222222222222222222"]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-trigger", "cron", "http-action", "http-trigger", "consensus", "don-time"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -75,7 +75,7 @@
supported_evm_chains = [1337]
supported_sol_chains = ["22222222222222222222222222222222222222222222"]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["web-api-target", "vault", "solana"]

[nodesets.db]
Expand All @@ -98,7 +98,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337]

[nodesets.db]
Expand Down
4 changes: 2 additions & 2 deletions core/scripts/cre/environment/configs/workflow-don-tron.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "write-evm-3360022319", "read-contract-1337", "read-contract-3360022319"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand All @@ -61,7 +61,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# because bootstrap job for capability DON will be created on the boostrap node from this DON
supported_evm_chains = [1337, 2337]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-trigger", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "read-contract-1337", "evm-1337"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -71,7 +71,7 @@
# to identify nodes in the gateway configuration (required by both web-api-target and vault capabilities)
supported_evm_chains = [1337, 2337]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["web-api-target", "vault", "write-evm-2337", "read-contract-2337", "evm-2337"]

[nodesets.db]
Expand All @@ -94,7 +94,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time"]

[nodesets.chain_capabilities]
Expand Down Expand Up @@ -79,7 +79,7 @@ Name = 'mock-private-registry'
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
12 changes: 7 additions & 5 deletions core/scripts/cre/environment/configs/workflow-gateway-don.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@
name = "workflow"
don_types = ["workflow"]
override_mode = "all"
http_port_range_start = 10100
http_port_range_start = 10000
p2p_port_range_start = 12000

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "write-evm-2337", "evm-1337", "evm-2337", "read-contract-1337", "read-contract-2337"]

[nodesets.db]
Expand All @@ -58,14 +59,15 @@
name = "bootstrap-gateway"
don_types = ["bootstrap", "gateway"]
override_mode = "each"
http_port_range_start = 10300
http_port_range_start = 10100
p2p_port_range_start = 12100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
image = "postgres:12.0"
port = 13200
port = 13100

[[nodesets.node_specs]]
roles = ["bootstrap", "gateway"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
# because bootstrap job for capability DON will be created on the boostrap node from this DON
supported_evm_chains = [1337, 2337]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-trigger", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "read-contract-1337", "evm-1337"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -72,7 +72,7 @@
# to identify nodes in the gateway configuration (required by both web-api-target and vault capabilities)
supported_evm_chains = [1337, 2337]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["web-api-target", "vault", "write-evm-2337", "read-contract-2337", "evm-2337"]

[nodesets.capability_configs]
Expand All @@ -99,7 +99,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ port = 8666
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-trigger", "don-time", "write-evm-1337"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -55,7 +55,7 @@ port = 8666
override_mode = "all"
http_port_range_start = 10200

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["mock"]

[nodesets.db]
Expand All @@ -75,7 +75,7 @@ port = 8666
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }

[nodesets.db]
image = "postgres:12.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
override_mode = "each"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "read-contract-1337", "evm-1337", "write-evm-2337", "read-contract-2337", "evm-2337"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -92,7 +92,7 @@
override_mode = "all"
http_port_range_start = 10400

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
# add "vault", "http-action", "http-trigger", when gateway can support more than 1 DON per service
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "cron", "consensus", "don-time", "write-evm-1337", "read-contract-1337", "evm-1337", "write-evm-2337", "read-contract-2337", "evm-2337"]

Expand All @@ -118,7 +118,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
Loading