Skip to content

Commit b2bdbd5

Browse files
authored
[FSSDK-9705] feat: add OpenTelemetry tracing (#400)
* add a basic tracing pipeline * add tracing config * update tracing config * use context * update handler with middleware * support both http & grpc protocal * refactor config * add unit test * update unit test * refactor code * make tracing disabled by default * add config doc * fix typo * collect more attributes * add common http attributes * collect http status code attribute * collect sdk key attribute * support distributed tracing * add unit test for trace id generator * update unit test * fix typo * fix typo in config file * update config.yaml * make trace_id configurable for distributed tracing
1 parent 794e827 commit b2bdbd5

File tree

15 files changed

+687
-71
lines changed

15 files changed

+687
-71
lines changed

cmd/optimizely/main.go

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package main
1818
import (
1919
"bytes"
2020
"context"
21+
"errors"
22+
"fmt"
2123
"os"
2224
"os/signal"
2325
"runtime"
@@ -32,6 +34,7 @@ import (
3234

3335
"github.com/optimizely/agent/config"
3436
"github.com/optimizely/agent/pkg/metrics"
37+
"github.com/optimizely/agent/pkg/middleware"
3538
"github.com/optimizely/agent/pkg/optimizely"
3639
"github.com/optimizely/agent/pkg/routers"
3740
"github.com/optimizely/agent/pkg/server"
@@ -44,6 +47,14 @@ import (
4447
// Initiate the loading of the odpCache plugins
4548
_ "github.com/optimizely/agent/plugins/odpcache/all"
4649
"github.com/optimizely/go-sdk/pkg/logging"
50+
"go.opentelemetry.io/otel"
51+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
52+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
53+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
54+
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
55+
"go.opentelemetry.io/otel/sdk/resource"
56+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
57+
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
4758
)
4859

4960
// Version holds the admin version
@@ -119,6 +130,97 @@ func initLogging(conf config.LogConfig) {
119130
}
120131
}
121132

133+
func getStdOutTraceProvider(conf config.OTELTracingConfig) (*sdktrace.TracerProvider, error) {
134+
f, err := os.Create(conf.Services.StdOut.Filename)
135+
if err != nil {
136+
return nil, fmt.Errorf("failed to create the trace file, error: %s", err.Error())
137+
}
138+
139+
exp, err := stdouttrace.New(
140+
stdouttrace.WithPrettyPrint(),
141+
stdouttrace.WithWriter(f),
142+
)
143+
if err != nil {
144+
return nil, fmt.Errorf("failed to create the collector exporter, error: %s", err.Error())
145+
}
146+
147+
res, err := resource.New(
148+
context.Background(),
149+
resource.WithAttributes(
150+
semconv.ServiceNameKey.String(conf.ServiceName),
151+
semconv.DeploymentEnvironmentKey.String(conf.Env),
152+
),
153+
)
154+
if err != nil {
155+
return nil, fmt.Errorf("failed to create the otel resource, error: %s", err.Error())
156+
}
157+
158+
return sdktrace.NewTracerProvider(
159+
sdktrace.WithBatcher(exp),
160+
sdktrace.WithResource(res),
161+
sdktrace.WithIDGenerator(middleware.NewTraceIDGenerator(conf.TraceIDHeaderKey)),
162+
), nil
163+
}
164+
165+
func getOTELTraceClient(conf config.OTELTracingConfig) (otlptrace.Client, error) {
166+
switch conf.Services.Remote.Protocol {
167+
case config.TracingRemoteProtocolHTTP:
168+
return otlptracehttp.NewClient(
169+
otlptracehttp.WithInsecure(),
170+
otlptracehttp.WithEndpoint(conf.Services.Remote.Endpoint),
171+
), nil
172+
case config.TracingRemoteProtocolGRPC:
173+
return otlptracegrpc.NewClient(
174+
otlptracegrpc.WithInsecure(),
175+
otlptracegrpc.WithEndpoint(conf.Services.Remote.Endpoint),
176+
), nil
177+
default:
178+
return nil, errors.New("unknown remote tracing protocal")
179+
}
180+
}
181+
182+
func getRemoteTraceProvider(conf config.OTELTracingConfig) (*sdktrace.TracerProvider, error) {
183+
res, err := resource.New(
184+
context.Background(),
185+
resource.WithAttributes(
186+
semconv.ServiceNameKey.String(conf.ServiceName),
187+
semconv.DeploymentEnvironmentKey.String(conf.Env),
188+
),
189+
)
190+
if err != nil {
191+
return nil, fmt.Errorf("failed to create the otel resource, error: %s", err.Error())
192+
}
193+
194+
traceClient, err := getOTELTraceClient(conf)
195+
if err != nil {
196+
return nil, fmt.Errorf("failed to create the remote trace client, error: %s", err.Error())
197+
}
198+
199+
traceExporter, err := otlptrace.New(context.Background(), traceClient)
200+
if err != nil {
201+
return nil, fmt.Errorf("failed to create the remote trace exporter, error: %s", err.Error())
202+
}
203+
204+
bsp := sdktrace.NewBatchSpanProcessor(traceExporter)
205+
return sdktrace.NewTracerProvider(
206+
sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(conf.Services.Remote.SampleRate))),
207+
sdktrace.WithResource(res),
208+
sdktrace.WithSpanProcessor(bsp),
209+
sdktrace.WithIDGenerator(middleware.NewTraceIDGenerator(conf.TraceIDHeaderKey)),
210+
), nil
211+
}
212+
213+
func initTracing(conf config.OTELTracingConfig) (*sdktrace.TracerProvider, error) {
214+
switch conf.Default {
215+
case config.TracingServiceTypeRemote:
216+
return getRemoteTraceProvider(conf)
217+
case config.TracingServiceTypeStdOut:
218+
return getStdOutTraceProvider(conf)
219+
default:
220+
return nil, errors.New("unknown tracing service type")
221+
}
222+
}
223+
122224
func setRuntimeEnvironment(conf config.RuntimeConfig) {
123225
if conf.BlockProfileRate != 0 {
124226
log.Warn().Msgf("Setting non-zero blockProfileRate is NOT recommended for production")
@@ -140,6 +242,22 @@ func main() {
140242
conf := loadConfig(v)
141243
initLogging(conf.Log)
142244

245+
if conf.Tracing.Enabled {
246+
tp, err := initTracing(conf.Tracing.OpenTelemetry)
247+
if err != nil {
248+
log.Panic().Err(err).Msg("Unable to initialize tracing")
249+
}
250+
defer func() {
251+
if err := tp.Shutdown(context.Background()); err != nil {
252+
log.Error().Err(err).Msg("Failed to shutdown tracing")
253+
}
254+
}()
255+
otel.SetTracerProvider(tp)
256+
log.Info().Msg(fmt.Sprintf("Tracing enabled with service %q", conf.Tracing.OpenTelemetry.Default))
257+
} else {
258+
log.Info().Msg("Tracing disabled")
259+
}
260+
143261
conf.LogConfigWarnings()
144262

145263
setRuntimeEnvironment(conf.Runtime)
@@ -164,7 +282,7 @@ func main() {
164282
cancel()
165283
}()
166284

167-
apiRouter := routers.NewDefaultAPIRouter(optlyCache, conf.API, agentMetricsRegistry)
285+
apiRouter := routers.NewDefaultAPIRouter(optlyCache, *conf, agentMetricsRegistry)
168286
adminRouter := routers.NewAdminRouter(*conf)
169287

170288
log.Info().Str("version", conf.Version).Msg("Starting services.")

cmd/optimizely/main_test.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,3 +418,92 @@ func TestLoggingWithIncludeSdkKey(t *testing.T) {
418418
})
419419
assert.False(t, optimizely.ShouldIncludeSDKKey)
420420
}
421+
422+
func Test_initTracing(t *testing.T) {
423+
type args struct {
424+
conf config.OTELTracingConfig
425+
}
426+
tests := []struct {
427+
name string
428+
args args
429+
wantErr bool
430+
}{
431+
{
432+
name: "should return error when exporter type is not supported",
433+
args: args{
434+
conf: config.OTELTracingConfig{
435+
Default: "unsupported",
436+
},
437+
},
438+
wantErr: true,
439+
},
440+
{
441+
name: "should return no error stdout tracing exporter",
442+
args: args{
443+
conf: config.OTELTracingConfig{
444+
Default: "stdout",
445+
Services: config.TracingServiceConfig{
446+
StdOut: config.TracingStdOutConfig{
447+
Filename: "trace.out",
448+
},
449+
},
450+
},
451+
},
452+
wantErr: false,
453+
},
454+
{
455+
name: "should return no error for remote tracing exporter with http protocal",
456+
args: args{
457+
conf: config.OTELTracingConfig{
458+
Default: "remote",
459+
Services: config.TracingServiceConfig{
460+
Remote: config.TracingRemoteConfig{
461+
Endpoint: "localhost:1234",
462+
Protocol: "http",
463+
},
464+
},
465+
},
466+
},
467+
wantErr: false,
468+
},
469+
{
470+
name: "should return no error for remote tracing exporter with grpc protocal",
471+
args: args{
472+
conf: config.OTELTracingConfig{
473+
Default: "remote",
474+
Services: config.TracingServiceConfig{
475+
Remote: config.TracingRemoteConfig{
476+
Endpoint: "localhost:1234",
477+
Protocol: "grpc",
478+
},
479+
},
480+
},
481+
},
482+
wantErr: false,
483+
},
484+
{
485+
name: "should return no error for remote tracing exporter with invalid protocal",
486+
args: args{
487+
conf: config.OTELTracingConfig{
488+
Default: "remote",
489+
Services: config.TracingServiceConfig{
490+
Remote: config.TracingRemoteConfig{
491+
Endpoint: "localhost:1234",
492+
Protocol: "udp/invalid",
493+
},
494+
},
495+
},
496+
},
497+
wantErr: true,
498+
},
499+
}
500+
for _, tt := range tests {
501+
t.Run(tt.name, func(t *testing.T) {
502+
_, err := initTracing(tt.args.conf)
503+
if (err != nil) != tt.wantErr {
504+
t.Errorf("initTracing() error = %v, wantErr %v", err, tt.wantErr)
505+
return
506+
}
507+
})
508+
}
509+
}

config.yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,45 @@ log:
2626
## to set whether or not the SDK key is included in the logging output.
2727
includeSdkKey: true
2828

29+
##
30+
## tracing: tracing configuration
31+
##
32+
tracing:
33+
## bydefault tracing is disabled
34+
## to enable tracing set enabled to true
35+
enabled: false
36+
# opentelemetry tracing configuration
37+
opentelemetry:
38+
## bydefault stdout exporter is enabled
39+
## to enable remote exporter set default as "remote"
40+
default: "stdout"
41+
## tracing service name
42+
serviceName: "optimizely-agent"
43+
## tracing environment name
44+
## example: for production environment env can be set as "prod"
45+
env: "dev"
46+
## HTTP Header Key for TraceID in Distributed Tracing
47+
## The value set in HTTP Header must be a hex compliant with the W3C trace-context specification.
48+
## See more at https://www.w3.org/TR/trace-context/#trace-id
49+
traceIDHeaderKey: "X-Optimizely-Trace-ID"
50+
## tracing service configuration
51+
services:
52+
## stdout exporter configuration
53+
stdout:
54+
## for stdout tracing data is saved in the specified file
55+
filename: "trace.out"
56+
## remote exporter configuration
57+
remote:
58+
## remote collector endpoint
59+
endpoint: "localhost:4317"
60+
## supported protocols are "http" and "grpc"
61+
protocol: "grpc"
62+
## "sampleRate" refers to the rate at which traces are collected and recorded.
63+
## sampleRate >= 1 will always sample.
64+
## sampleRate < 0 are treated as zero i.e. never sample.
65+
sampleRate: 1.0
66+
67+
2968
##
3069
## http server configuration
3170
##

config/config.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ func NewDefaultConfig() *AgentConfig {
7171
IncludeSDKKey: true,
7272
Level: "info",
7373
},
74+
Tracing: TracingConfig{
75+
Enabled: false,
76+
},
7477
Client: ClientConfig{
7578
PollingInterval: 1 * time.Minute,
7679
BatchSize: 10,
@@ -139,6 +142,7 @@ type AgentConfig struct {
139142
Admin AdminConfig `json:"admin"`
140143
API APIConfig `json:"api"`
141144
Log LogConfig `json:"log"`
145+
Tracing TracingConfig `json:"tracing"`
142146
Client ClientConfig `json:"client"`
143147
Runtime RuntimeConfig `json:"runtime"`
144148
Server ServerConfig `json:"server"`
@@ -201,6 +205,48 @@ type LogConfig struct {
201205
Level string `json:"level"`
202206
}
203207

208+
type TracingConfig struct {
209+
Enabled bool `json:"enabled"`
210+
OpenTelemetry OTELTracingConfig `json:"opentelemetry"`
211+
}
212+
213+
type TracingServiceType string
214+
215+
const (
216+
TracingServiceTypeStdOut TracingServiceType = "stdout"
217+
TracingServiceTypeRemote TracingServiceType = "remote"
218+
)
219+
220+
type TracingRemoteProtocol string
221+
222+
const (
223+
TracingRemoteProtocolGRPC TracingRemoteProtocol = "grpc"
224+
TracingRemoteProtocolHTTP TracingRemoteProtocol = "http"
225+
)
226+
227+
type OTELTracingConfig struct {
228+
Default TracingServiceType `json:"default"`
229+
ServiceName string `json:"serviceName"`
230+
Env string `json:"env"`
231+
TraceIDHeaderKey string `json:"traceIDHeaderKey"`
232+
Services TracingServiceConfig `json:"services"`
233+
}
234+
235+
type TracingServiceConfig struct {
236+
StdOut TracingStdOutConfig `json:"stdout"`
237+
Remote TracingRemoteConfig `json:"remote"`
238+
}
239+
240+
type TracingStdOutConfig struct {
241+
Filename string `json:"filename"`
242+
}
243+
244+
type TracingRemoteConfig struct {
245+
Endpoint string `json:"endpoint"`
246+
Protocol TracingRemoteProtocol `json:"protocol"`
247+
SampleRate float64 `json:"sampleRate"`
248+
}
249+
204250
// PluginConfigs defines the generic mapping of middleware plugins
205251
type PluginConfigs map[string]interface{}
206252

0 commit comments

Comments
 (0)