Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
11155f0
feat: trace watchable messages
shreealt Oct 31, 2025
f66879c
license header
shreealt Oct 31, 2025
2c0d0a1
lint
shreealt Oct 31, 2025
4777f57
default traces
shreealt Oct 31, 2025
0af47d1
disability
shreealt Oct 31, 2025
c620c6e
deepcopy errors
shreealt Oct 31, 2025
aa6b91b
lint
shreealt Oct 31, 2025
2ad7399
e2e
shreealt Nov 1, 2025
f8d9f42
Merge branch 'main' of github.com:envoyproxy/gateway into feat/tracing
shreealt Nov 1, 2025
ff03fd5
doc
shreealt Nov 1, 2025
bf58e55
minor fixes
shreealt Nov 2, 2025
c0846eb
rc
shreealt Nov 2, 2025
1c5e52e
generate
shreealt Nov 2, 2025
623406e
fix coverage tests
shreealt Nov 2, 2025
fd41925
fix: e2e config
shreealt Nov 2, 2025
8392b41
runner test fix
shreealt Nov 2, 2025
934dca2
Merge branch 'main' into feat/tracing
shreealt Nov 2, 2025
fcadab2
logger
shreealt Nov 4, 2025
afc0812
mod
shreealt Nov 4, 2025
0480ec5
mod
shreealt Nov 4, 2025
8b834a9
trace config cleanup
shreealt Nov 4, 2025
8ca0a2c
lint
shreealt Nov 4, 2025
1be27d3
testing cleanup
shreealt Nov 4, 2025
37107cd
lint
shreealt Nov 4, 2025
f0ab998
snapshot cache
shreealt Nov 4, 2025
8ee0225
rename
shreealt Nov 4, 2025
0c2b09e
Merge branch 'main' of github.com:envoyproxy/gateway into feat/tracing
shreealt Nov 5, 2025
10fe843
Merge branch 'main' of github.com:envoyproxy/gateway into feat/tracing
shreealt Nov 7, 2025
3728b82
fix
shreealt Nov 7, 2025
6c9d222
Merge branch 'main' into feat/tracing
shreealt Nov 9, 2025
3d3340b
Merge branch 'main' into feat/tracing
shreealt Nov 15, 2025
d4b7eb4
revert api
shreealt Nov 15, 2025
b59ff0d
Merge branch 'feat/tracing' of github.com-work:shreealt/gateway into …
shreealt Nov 15, 2025
c4b00c1
rm type
shreealt Nov 15, 2025
58ec2e2
Merge branch 'main' into feat/tracing
shreealt Nov 16, 2025
3edaaf4
Merge branch 'main' of github.com:envoyproxy/gateway into feat/tracing
shreealt Nov 17, 2025
520a840
Merge branch 'feat/tracing' of github.com-work:shreealt/gateway into …
shreealt Nov 17, 2025
93f2ace
more spans and cleanups
shreealt Nov 21, 2025
3bf3309
lint
shreealt Nov 21, 2025
1e1322b
equal and tests
shreealt Nov 23, 2025
cd7fdd6
equal and tests
shreealt Nov 23, 2025
9836161
no otel things in translator
shreealt Nov 23, 2025
d96ac3a
review comments
shreealt Nov 27, 2025
be8f839
nil check
shreealt Nov 27, 2025
0c545cc
flags
shreealt Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions internal/cmd/egctl/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package egctl

import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -291,7 +292,8 @@ func translateGatewayAPIToIR(resources *resource.Resources) (*gatewayapi.Transla
}
}

result, _ := t.Translate(resources)
ctx := context.Background()
result, _ := t.Translate(resources, ctx)

return result, nil
}
Expand All @@ -311,7 +313,7 @@ func translateGatewayAPIToGatewayAPI(resources *resource.Resources) (resource.Re
BackendEnabled: true,
Logger: logging.DefaultLogger(io.Discard, egv1a1.LogLevelInfo),
}
gRes, _ := gTranslator.Translate(resources)
gRes, _ := gTranslator.Translate(resources, context.Background())
// Update the status of the GatewayClass based on EnvoyProxy validation
epInvalid := false
if resources.EnvoyProxyForGatewayClass != nil {
Expand Down Expand Up @@ -351,7 +353,7 @@ func TranslateGatewayAPIToXds(namespace, dnsDomain, resourceType string, resourc
BackendEnabled: true,
Logger: logging.DefaultLogger(io.Discard, egv1a1.LogLevelInfo),
}
gRes, _ := gTranslator.Translate(resources)
gRes, _ := gTranslator.Translate(resources, context.Background())

keys := []string{}
for key := range gRes.XdsIR {
Expand All @@ -374,7 +376,7 @@ func TranslateGatewayAPIToXds(namespace, dnsDomain, resourceType string, resourc
if resources.EnvoyProxyForGatewayClass != nil {
xTranslator.FilterOrder = resources.EnvoyProxyForGatewayClass.Spec.FilterOrder
}
xRes, err := xTranslator.Translate(val)
xRes, err := xTranslator.Translate(val, context.Background())
if err != nil {
return nil, fmt.Errorf("failed to translate xds ir for key %s value %+v, error:%w", key, val, err)
}
Expand Down
9 changes: 7 additions & 2 deletions internal/gatewayapi/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/telepresenceio/watchable"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand Down Expand Up @@ -144,7 +145,6 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re
// There is only 1 key which is the controller name
// so when a delete is triggered, delete all keys
if update.Delete || valWrapper == nil || valWrapper.Resources == nil {
span.AddEvent("delete_all_keys")
r.deleteAllKeys()
return
}
Expand All @@ -169,6 +169,7 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re
var backendTLSPolicyStatusCount, clientTrafficPolicyStatusCount, backendTrafficPolicyStatusCount int
var securityPolicyStatusCount, envoyExtensionPolicyStatusCount, backendStatusCount, extensionServerPolicyStatusCount int

span.AddEvent("gateway_resources_translation_cycle", trace.WithAttributes(attribute.Int("resources.count", len(*val))))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thoughts on just translate ? , since tracer already has gateway-api runner info

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry I did not get you, did you intend to name this event just translate?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah

for _, resources := range *val {
// Translate and publish IRs.
t := &gatewayapi.Translator{
Expand Down Expand Up @@ -199,11 +200,13 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re
traceLogger.Info("extension resources", "GVKs count", len(extGKs))
}
// Translate to IR
result, err := t.Translate(resources)
_, translateToIRSpan := tracer.Start(parentCtx, "GatewayApiRunner.ResoureTranslationCycle.TranslateToIR")
result, err := t.Translate(resources, parentCtx)
if err != nil {
// Currently all errors that Translate returns should just be logged
traceLogger.Error(err, "errors detected during translation", "gateway-class", resources.GatewayClass.Name)
}
translateToIRSpan.End()

// Publish the IRs.
// Also validate the ir before sending it.
Expand Down Expand Up @@ -243,6 +246,7 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re
}

// Update Status
_, statusUpdateSpan := tracer.Start(parentCtx, "GatewayApiRunner.ResoureTranslationCycle.UpdateStatus")
if result.GatewayClass != nil {
key := utils.NamespacedName(result.GatewayClass)
r.ProviderResources.GatewayClassStatuses.Store(key, &result.GatewayClass.Status)
Expand Down Expand Up @@ -365,6 +369,7 @@ func (r *Runner) subscribeAndTranslate(sub <-chan watchable.Snapshot[string, *re
delete(keysToDelete.ExtensionServerPolicyStatus, key)
r.keyCache.ExtensionServerPolicyStatus[key] = true
}
statusUpdateSpan.End()
}

// Publish aggregated metrics
Expand Down
28 changes: 25 additions & 3 deletions internal/gatewayapi/translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
package gatewayapi

import (
"context"
"errors"
"fmt"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"golang.org/x/exp/maps"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand Down Expand Up @@ -49,10 +52,13 @@ const (
wellKnownPortShift = 10000
)

var _ TranslatorManager = (*Translator)(nil)
var (
_ TranslatorManager = (*Translator)(nil)
tracer = otel.Tracer("envoy-gateway/gateway-api/translator")
)

type TranslatorManager interface {
Translate(resources *resource.Resources) (*TranslateResult, error)
Translate(resources *resource.Resources, ctx context.Context) (*TranslateResult, error)
GetRelevantGateways(resources *resource.Resources) (acceptedGateways, failedGateways []*GatewayContext)

RoutesTranslator
Expand Down Expand Up @@ -217,7 +223,10 @@ func newTranslateResult(
return translateResult
}

func (t *Translator) Translate(resources *resource.Resources) (*TranslateResult, error) {
func (t *Translator) Translate(resources *resource.Resources, ctx context.Context) (*TranslateResult, error) {
_, span := tracer.Start(ctx, "Translator.Translate")
defer span.End()
span.SetAttributes(getAttributes(resources)...)
var errs error

// Get Gateways belonging to our GatewayClass.
Expand Down Expand Up @@ -529,3 +538,16 @@ func (t *Translator) IRKey(gatewayNN types.NamespacedName) string {
}
return irStringKey(gatewayNN.Namespace, gatewayNN.Name)
}

func getAttributes(resources *resource.Resources) []attribute.KeyValue {
attrs := []attribute.KeyValue{}
if resources.GatewayClass == nil {
return attrs
}
attrs = append(attrs, attribute.String("gateway-class", resources.GatewayClass.Name))
attrs = append(attrs, attribute.String("gateway-class-namespace", resources.GatewayClass.Namespace))
if resources.GatewayClass.Spec.ControllerName != "" {
attrs = append(attrs, attribute.String("gateway-class-controller-name", string(resources.GatewayClass.Spec.ControllerName)))
}
return attrs
}
4 changes: 2 additions & 2 deletions internal/gatewayapi/translator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ func TestTranslate(t *testing.T) {
},
})

got, _ := translator.Translate(resources)
got, _ := translator.Translate(resources, context.Background())
require.NoError(t, field.SetValue(got, "LastTransitionTime", metav1.NewTime(time.Time{})))
outputFilePath := strings.ReplaceAll(inputFile, ".in.yaml", ".out.yaml")
out, err := yaml.Marshal(got)
Expand Down Expand Up @@ -696,7 +696,7 @@ func TestTranslateWithExtensionKinds(t *testing.T) {
},
})

got, _ := translator.Translate(resources)
got, _ := translator.Translate(resources, context.Background())
require.NoError(t, field.SetValue(got, "LastTransitionTime", metav1.NewTime(time.Time{})))
// Also fix lastTransitionTime in unstructured members
for i := range got.ExtensionServerPolicies {
Expand Down
22 changes: 21 additions & 1 deletion internal/globalratelimit/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
resourcev3 "github.com/envoyproxy/go-control-plane/pkg/resource/v3"
serverv3 "github.com/envoyproxy/go-control-plane/pkg/server/v3"
"github.com/telepresenceio/watchable"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"

Expand Down Expand Up @@ -47,6 +49,8 @@ const (
rateLimitTLSCACertFilepath = "/certs/ca.crt"
)

var tracer = otel.Tracer("envoy-gateway/global-rate-limit/runner")

type Config struct {
config.Server
XdsIR *message.XdsIR
Expand Down Expand Up @@ -138,7 +142,20 @@ func (r *Runner) translateFromSubscription(ctx context.Context, c <-chan watchab

message.HandleSubscription(message.Metadata{Runner: r.Name(), Message: message.XDSIRMessageName}, c,
func(update message.Update[string, *message.XdsIRWithContext], errChan chan error) {
Copy link
Contributor

@arkodg arkodg Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar to the gateway-api runner can we add a parent span and 2 child span for translate & updateSnapshot

r.Logger.Info("received a notification")
parentCtx := context.Background()
if update.Value != nil && update.Value.Context != nil {
parentCtx = update.Value.Context
}
traceLogger := r.Logger.WithTrace(parentCtx)
traceLogger.Info("received a notification")

_, span := tracer.Start(parentCtx, "GlobalRateLimitRunner.translateFromSubscription")
defer span.End()

span.SetAttributes(
attribute.String("controller.key", update.Key),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xds-ir.key

attribute.Bool("update.delete", update.Delete),
)

if update.Delete {
delete(rateLimitConfigsCache, update.Key)
Expand Down Expand Up @@ -183,6 +200,9 @@ func (r *Runner) translate(xdsIR *ir.Xds) (*types.ResourceVersionTable, error) {
}

func (r *Runner) updateSnapshot(ctx context.Context, resource types.XdsResources) {
_, span := tracer.Start(ctx, "GlobalRateLimitRunner.updateSnapshot")
defer span.End()

if r.cache == nil {
r.Logger.Error(nil, "failed to init the snapshot cache")
return
Expand Down
2 changes: 1 addition & 1 deletion internal/xds/cache/snapshotcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import (

var (
Hash = cachev3.IDHash{}
tracer = otel.Tracer("envoy-gateway/gateway-api")
tracer = otel.Tracer("envoy-gateway/xds/snapshotcache")
)

// SnapshotCacheWithCallbacks uses the go-control-plane SimpleCache to store snapshots of
Expand Down
4 changes: 2 additions & 2 deletions internal/xds/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ const (
defaultMaxConnectionAgeGrace = 2 * time.Minute
)

var tracer = otel.Tracer("envoy-gateway/gateway-api")
var tracer = otel.Tracer("envoy-gateway/xds")

var maxConnectionAgeValues = []time.Duration{
10 * time.Hour,
Expand Down Expand Up @@ -316,7 +316,7 @@ func (r *Runner) translateFromSubscription(sub <-chan watchable.Snapshot[string,
}
}

result, err := t.Translate(val.XdsIR)
result, err := t.Translate(val.XdsIR, parentCtx)
if err != nil {
traceLogger.Error(err, "failed to translate xds ir")
errChan <- err
Expand Down
9 changes: 8 additions & 1 deletion internal/xds/translator/translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package translator

import (
"context"
"errors"
"fmt"
"runtime"
Expand All @@ -22,6 +23,7 @@ import (
matcherv3 "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3"
resourcev3 "github.com/envoyproxy/go-control-plane/pkg/resource/v3"
"github.com/envoyproxy/go-control-plane/pkg/wellknown"
"go.opentelemetry.io/otel"
protobuf "google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
"google.golang.org/protobuf/types/known/wrapperspb"
Expand All @@ -44,6 +46,8 @@ const (
emptyClusterName = "EmptyCluster"
)

var tracer = otel.Tracer("envoy-gateway/xds/translator")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets not add any Otel things to the lib, but in the runner please, in case these libs are made public and used purely for translation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


// The dummy cluster for TCP/UDP listeners that have no routes
var emptyRouteCluster = &clusterv3.Cluster{
Name: emptyClusterName,
Expand Down Expand Up @@ -94,7 +98,10 @@ type GlobalRateLimitSettings struct {
}

// Translate translates the XDS IR into xDS resources
func (t *Translator) Translate(xdsIR *ir.Xds) (*types.ResourceVersionTable, error) {
func (t *Translator) Translate(xdsIR *ir.Xds, ctx context.Context) (*types.ResourceVersionTable, error) {
_, span := tracer.Start(ctx, "Translator.Translate")
defer span.End()

if xdsIR == nil {
return nil, errors.New("ir is nil")
}
Expand Down
7 changes: 4 additions & 3 deletions internal/xds/translator/translator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package translator

import (
"context"
"embed"
"encoding/json"
"os"
Expand Down Expand Up @@ -180,7 +181,7 @@ func TestTranslateXds(t *testing.T) {
FilterOrder: x.FilterOrder,
RuntimeFlags: cfg.runtimeFlags,
}
tCtx, err := tr.Translate(x)
tCtx, err := tr.Translate(x, context.Background())
if !strings.HasSuffix(inputFileName, "partial-invalid") && len(cfg.errMsg) == 0 {
t.Log(inputFileName)
require.NoError(t, err)
Expand Down Expand Up @@ -384,7 +385,7 @@ func TestTranslateXdsWithExtensionErrorsWhenFailOpen(t *testing.T) {
defer closeFunc()
tr.ExtensionManager = &extMgr

tCtx, err := tr.Translate(x)
tCtx, err := tr.Translate(x, context.Background())
if len(cfg.errMsg) > 0 {
require.EqualError(t, err, cfg.errMsg)
} else {
Expand Down Expand Up @@ -525,7 +526,7 @@ func TestTranslateXdsWithExtensionErrorsWhenFailClosed(t *testing.T) {
defer closeFunc()
tr.ExtensionManager = &extMgr

_, err = tr.Translate(x)
_, err = tr.Translate(x, context.Background())
require.EqualError(t, err, cfg.errMsg)
})
}
Expand Down
7 changes: 0 additions & 7 deletions test/helm/gateway-addons-helm/e2e.in.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,5 @@ grafana:
enabled: false
opentelemetry-collector:
enabled: true
mode: deployment
service:
type: LoadBalancer
fluent-bit:
enabled: false
tempo:
enabled: true
service:
type: LoadBalancer
2 changes: 1 addition & 1 deletion test/helm/gateway-addons-helm/e2e.out.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10334,7 +10334,7 @@ metadata:
app.kubernetes.io/component: standalone-collector
component: standalone-collector
spec:
type: LoadBalancer
type: ClusterIP
ports:

- name: datadog
Expand Down
Loading