From 49092b3f68eb8719ecf45526bf1bccfe56eb0a31 Mon Sep 17 00:00:00 2001 From: Jin Hou Date: Wed, 22 Oct 2025 13:25:23 -0700 Subject: [PATCH 1/2] feat: Add initial set of system metircs --- internal/tel/tel.go | 98 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 internal/tel/tel.go diff --git a/internal/tel/tel.go b/internal/tel/tel.go new file mode 100644 index 00000000..8d1f18cf --- /dev/null +++ b/internal/tel/tel.go @@ -0,0 +1,98 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tel + +import "context" + +const ( + meterName = "alloydb.googleapis.com/client/connector" + monitoredResource = "cloudsql.googleapis.com/InstanceClient" + connectLatency = "connect_latencies" + closedConnectionCount = "closed_connection_count" + openConnections = "open_connections" + + // The identifier of the GCP project associated with this CSQL resource + ResourceContainer = "resource_container" + // The Cloud SQL instance identifier in the format of [project_name:instance_name] + ResourceID = "resource_id" + // A unique identifier generated for each Dialer instance + ClientUID = "client_uid" + // The application name provided by the user or defaulted by the connector + ApplicationName = "application_name" + // Cloud SQL Instance's location e.g. us-central1 + Region = "region" + // ClientRegion is the region from which the client is connecting, unknown if not on GCP + ClientRegion = "client_region" + // ComputePlatform is the platform on which the client is running, e.g. GCE, GKE, etc. + ComputePlatform = "compute_platform" + // Cloud SQL Connector type. "go" in this case. + ConnectorType = "connector_type" + // Cloud SQL Connector version + ConnectorVersion = "connector_version" + // Database engine type [MySQL, PostgreSQL, SQL Server]. + DatabaseEngineType = "database_engine_type" + // authType is one of iam or built-in + authType = "auth_type" + // IP address type of the connection, one of [public, psa, psc] + ipType = "ip_type" + // status indicates whether the dial attempt succeeded or not. + status = "status" +) + +// Config holds all the necessary information to configure a MetricRecorder. +type Config struct { + // Enabled specifies whether the metrics should be enabled. + Enabled bool + // Project id + ResourceContainer string + // The Cloud SQL instance identifier in the format of [project_name:instance_name] + ResourceID string + // A unique identifier generated for each Dialer instance + ClientUID string + // The application name provided by the user or defaulted by the connector + ApplicationName string + // Cloud SQL Instance's location e.g. us-central1 + Region string + // ClientRegion is the region from which the client is connecting, unknown if not on GCP + ClientRegion string + // ComputePlatform is the platform on which the client is running, e.g. GCE, GKE, etc. + ComputePlatform string + // Cloud SQL Connector type. "go" in this case. + ConnectorType string + // Cloud SQL Connector version + ConnectorVersion string + // Database engine type [MySQL, PostgreSQL, SQL Server]. + DatabaseEngineType string +} + +// Attributes holds all the various pieces of metadata to attach to a metric. +type Attributes struct { + // IAMAuthN specifies whether IAM authentication is enabled. + IAMAuthN bool + // UserAgent is the full user-agent of the alloydbconn.Dialer. + UserAgent string + // CacheHit specifies whether connection info was present in the cache. + CacheHit bool + // DialStatus specifies the result of the dial attempt. + DialStatus string +} + +// MetricRecorder defines the interface for recording metrics related to the +// internal operations of alloydbconn.Dialer. +type MetricRecorder interface { + RecordOpenConnection(context.Context, Attributes) + RecordClosedConnectionCount(context.Context, Attributes) + RecordConnectLatencies(context.Context, Attributes) +} From ad4a6e6ee9a149478e24eacee3da9b2ae865f881 Mon Sep 17 00:00:00 2001 From: Jin Hou Date: Tue, 4 Nov 2025 15:05:57 -0800 Subject: [PATCH 2/2] Add new metrics --- dialer.go | 118 ++++++++++++++++++----- go.mod | 12 ++- go.sum | 40 ++++++-- internal/tel/tel.go | 228 ++++++++++++++++++++++++++++++++++++++++---- options.go | 8 ++ 5 files changed, 358 insertions(+), 48 deletions(-) diff --git a/dialer.go b/dialer.go index b064af28..076efafd 100644 --- a/dialer.go +++ b/dialer.go @@ -44,6 +44,8 @@ import ( "github.com/google/uuid" "golang.org/x/net/proxy" "google.golang.org/api/option" + + tel "cloud.google.com/go/cloudsqlconn/internal/tel" sqladmin "google.golang.org/api/sqladmin/v1beta4" ) @@ -71,6 +73,8 @@ var ( //go:embed version.txt versionString string userAgent = "cloud-sql-go-connector/" + strings.TrimSpace(versionString) + // dialerID is a unique ID for the dialer process. + dialerID = uuid.New().String() ) // keyGenerator encapsulates the details of RSA key generation to provide lazy @@ -174,7 +178,9 @@ type Dialer struct { // dialerID uniquely identifies a Dialer. Used for monitoring purposes, // *only* when a client has configured OpenCensus exporters. - dialerID string + dialerID string + metricsMu sync.Mutex + metricRecorders map[instance.ConnName]tel.MetricRecorder // dialFunc is the function used to connect to the address on the named // network. By default, it is golang.org/x/net/proxy#Dial. @@ -190,6 +196,18 @@ type Dialer struct { // metadataExchangeDisabled true when the dialer should never // send MDX mdx requests. metadataExchangeDisabled bool + + // applicationName is the name of the application using the dialer. + applicationName string + + // disableBuiltInMetrics turns the internal metric export into a no-op. + disableBuiltInMetrics bool + + // clientOpts are options for all Google Cloud API clients. + clientOpts []option.ClientOption + + // userAgent is the combined user agent string. + userAgent string } var ( @@ -208,11 +226,12 @@ func (nullLogger) Debugf(_ context.Context, _ string, _ ...interface{}) {} // RSA keypair is generated will be faster. func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) { cfg := &dialerConfig{ - refreshTimeout: cloudsql.RefreshTimeout, - dialFunc: proxy.Dial, - logger: nullLogger{}, - useragents: []string{userAgent}, - failoverPeriod: cloudsql.FailoverPeriod, + refreshTimeout: cloudsql.RefreshTimeout, + dialFunc: proxy.Dial, + logger: nullLogger{}, + useragents: []string{userAgent}, + failoverPeriod: cloudsql.FailoverPeriod, + applicationName: "unknown", } for _, opt := range opts { opt(cfg) @@ -318,17 +337,49 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) { sqladmin: client, logger: cfg.logger, defaultDialConfig: dc, - dialerID: uuid.New().String(), + dialerID: dialerID, iamTokenProvider: cfg.iamLoginTokenProvider, + metricRecorders: map[instance.ConnName]tel.MetricRecorder{}, dialFunc: cfg.dialFunc, resolver: r, failoverPeriod: cfg.failoverPeriod, metadataExchangeDisabled: cfg.metadataExchangeDisabled, + userAgent: strings.Join(cfg.useragents, " "), + applicationName: cfg.applicationName, } + // print dialer id to terminal for debugging purposes + fmt.Println("Cloud SQL Go Connector Dialer ID:", d.dialerID) + return d, nil } +// metricRecorder does a lazy initialization of the metric exporter. +func (d *Dialer) metricRecorder(ctx context.Context, inst instance.ConnName) tel.MetricRecorder { + d.metricsMu.Lock() + defer d.metricsMu.Unlock() + if mr, ok := d.metricRecorders[inst]; ok { + return mr + } + cfg := tel.Config{ + Enabled: !d.disableBuiltInMetrics, + Version: versionString, + ResourceContainer: inst.Project(), + ResourceID: inst.Name(), + ClientUID: d.dialerID, + ApplicationName: d.applicationName, + Region: inst.Region(), + ClientRegion: "Client-Region-Testing", // TODO: detect client region + ComputePlatform: "Compute-Platform-Testing", // TODO: detect compute platform + ConnectorType: tel.ConnectorTypeValue(d.userAgent), + ConnectorVersion: versionString, + DatabaseEngineType: "DB-Engine-Type-Testing", // TODO: detect database engine type + } + mr := tel.NewMetricRecorder(ctx, d.logger, cfg, d.clientOpts...) + d.metricRecorders[inst] = mr + return mr +} + // Dial returns a net.Conn connected to the specified Cloud SQL instance. The // icn argument may be the instance's connection name in the format // "project-name:region:instance-name" or a DNS name that resolves to an @@ -339,8 +390,29 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn return nil, ErrDialerClosed default: } + cfg := d.defaultDialConfig + for _, opt := range opts { + opt(&cfg) + } + + // Resolve the instance connection name to a ConnName struct. + // Note: icn may be a domain name that resolves to an instance connection name. + cn, err := d.resolver.Resolve(ctx, icn) + if err != nil { + return nil, err + } + mr := d.metricRecorder(ctx, cn) + startTime := time.Now() var endDial trace.EndSpanFunc + attrs := tel.Attributes{ + IAMAuthN: cfg.useIAMAuthN, + RefreshType: tel.RefreshAheadType, + IPType: cfg.ipType, + } + if d.lazyRefresh { + attrs.RefreshType = tel.RefreshLazyType + } ctx, endDial = trace.StartSpan(ctx, "cloud.google.com/go/cloudsqlconn.Dial", trace.AddInstanceName(icn), trace.AddDialerID(d.dialerID), @@ -349,10 +421,6 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn trace.RecordDialError(context.Background(), icn, d.dialerID, err) endDial(err) }() - cn, err := d.resolver.Resolve(ctx, icn) - if err != nil { - return nil, err - } // Log if resolver changed the instance name input string. if cn.DomainName() != "" { @@ -363,14 +431,10 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn d.logger.Debugf(ctx, "resolved instance connection string %s to %s", icn, cn.String()) } - cfg := d.defaultDialConfig - for _, opt := range opts { - opt(&cfg) - } - var endInfo trace.EndSpanFunc ctx, endInfo = trace.StartSpan(ctx, "cloud.google.com/go/cloudsqlconn/internal.InstanceInfo") - c, err := d.connectionInfoCache(ctx, cn, &cfg.useIAMAuthN) + c, cacheHit, err := d.connectionInfoCache(ctx, cn, &cfg.useIAMAuthN) + attrs.CacheHit = cacheHit if err != nil { endInfo(err) return nil, err @@ -453,10 +517,16 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn n := c.openConnsCount.Add(1) trace.RecordOpenConnections(ctx, int64(n), d.dialerID, cn.String()) trace.RecordDialLatency(ctx, icn, d.dialerID, latency) + mr.RecordOpenConnection(ctx, attrs) + mr.RecordConnectLatencies(ctx, attrs, latency) closeFunc := func() { n := c.openConnsCount.Add(^uint64(0)) // c.openConnsCount = c.openConnsCount - 1 trace.RecordOpenConnections(context.Background(), int64(n), d.dialerID, cn.String()) + mr.RecordClosedConnection(context.Background(), attrs) + mr.RecordClosedConnectionCount(context.Background(), attrs) + // lot the message to terminal for debugging purposes + fmt.Println("Cloud SQL Go Connector Dialer ID:", d.dialerID, "closed connection to instance:", cn.String()) } errFunc := func(err error) { // io.EOF occurs when the server closes the connection. This is safe to @@ -553,7 +623,7 @@ func (d *Dialer) EngineVersion(ctx context.Context, icn string) (string, error) if err != nil { return "", err } - c, err := d.connectionInfoCache(ctx, cn, &d.defaultDialConfig.useIAMAuthN) + c, _, err := d.connectionInfoCache(ctx, cn, &d.defaultDialConfig.useIAMAuthN) if err != nil { return "", err } @@ -577,7 +647,7 @@ func (d *Dialer) Warmup(ctx context.Context, icn string, opts ...DialOption) err for _, opt := range opts { opt(&cfg) } - c, err := d.connectionInfoCache(ctx, cn, &cfg.useIAMAuthN) + c, _, err := d.connectionInfoCache(ctx, cn, &cfg.useIAMAuthN) if err != nil { return err } @@ -724,7 +794,7 @@ func createKey(cn instance.ConnName) cacheKey { // modify the existing one, or leave it unchanged as needed. func (d *Dialer) connectionInfoCache( ctx context.Context, cn instance.ConnName, useIAMAuthN *bool, -) (*monitoredCache, error) { +) (*monitoredCache, bool, error) { k := createKey(cn) d.lock.RLock() @@ -733,7 +803,7 @@ func (d *Dialer) connectionInfoCache( if ok && !c.isClosed() { c.UpdateRefresh(useIAMAuthN) - return c, nil + return c, ok, nil } d.lock.Lock() @@ -745,7 +815,7 @@ func (d *Dialer) connectionInfoCache( // c exists and is not closed if ok && !c.isClosed() { c.UpdateRefresh(useIAMAuthN) - return c, nil + return c, ok, nil } // Create a new instance of monitoredCache @@ -756,7 +826,7 @@ func (d *Dialer) connectionInfoCache( d.logger.Debugf(ctx, "[%v] Connection info added to cache", cn.String()) rsaKey, err := d.keyGenerator.rsaKey() if err != nil { - return nil, err + return nil, ok, err } var cache connectionInfoCache if d.lazyRefresh { @@ -779,7 +849,7 @@ func (d *Dialer) connectionInfoCache( c = newMonitoredCache(cache, cn, d.failoverPeriod, d.resolver, d.logger) d.cache[k] = c - return c, nil + return c, ok, nil } // newMDXRequest builds a metadata exchange request based on the connection diff --git a/go.mod b/go.mod index d116b9d4..4a9af9d3 100644 --- a/go.mod +++ b/go.mod @@ -7,12 +7,17 @@ toolchain go1.25.3 require ( cloud.google.com/go/auth v0.17.0 cloud.google.com/go/auth/oauth2adapt v0.2.8 + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 github.com/go-sql-driver/mysql v1.9.3 github.com/google/uuid v1.6.0 github.com/jackc/pgx/v4 v4.18.3 github.com/jackc/pgx/v5 v5.7.6 github.com/microsoft/go-mssqldb v1.9.3 go.opencensus.io v0.24.0 + go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel/metric v1.38.0 + go.opentelemetry.io/otel/sdk v1.38.0 + go.opentelemetry.io/otel/sdk/metric v1.38.0 golang.org/x/net v0.46.0 golang.org/x/oauth2 v0.32.0 golang.org/x/time v0.14.0 @@ -24,7 +29,9 @@ require ( require ( cloud.google.com/go/compute/metadata v0.9.0 // indirect + cloud.google.com/go/monitoring v1.24.2 // indirect filippo.io/edwards25519 v1.1.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -43,12 +50,13 @@ require ( github.com/jackc/pgtype v1.14.4 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect - go.opentelemetry.io/otel v1.38.0 // indirect - go.opentelemetry.io/otel/metric v1.38.0 // indirect go.opentelemetry.io/otel/trace v1.38.0 // indirect golang.org/x/crypto v0.43.0 // indirect golang.org/x/sync v0.17.0 // indirect golang.org/x/sys v0.37.0 // indirect golang.org/x/text v0.30.0 // indirect + google.golang.org/genproto v0.0.0-20250922171735-9219d122eba9 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250922171735-9219d122eba9 // indirect ) diff --git a/go.sum b/go.sum index f803212d..b0f7e959 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,19 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.118.3 h1:jsypSnrE/w4mJysioGdMBg4MiW/hHx/sArFpaBWHdME= cloud.google.com/go/auth v0.17.0 h1:74yCm7hCj2rUyyAocqnFzsAYXgJhrG26XCFimrc/Kz4= cloud.google.com/go/auth v0.17.0/go.mod h1:6wv/t5/6rOPAX4fJiRjKkJCvswLwdet7G8+UGXt7nCQ= cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +cloud.google.com/go/logging v1.13.0 h1:7j0HgAp0B94o1YRDqiqm26w4q1rDMH7XNRU34lJXHYc= +cloud.google.com/go/logging v1.13.0/go.mod h1:36CoKh6KA/M0PbhPKMq6/qety2DCAErbhXT62TuXALA= +cloud.google.com/go/longrunning v0.6.7 h1:IGtfDWHhQCgCjwQjV9iiLnUta9LBCo8R9QmAFsS/PrE= +cloud.google.com/go/longrunning v0.6.7/go.mod h1:EAFV3IZAKmM56TyiE6VAP3VoTzhZzySwI/YI1s/nRsY= +cloud.google.com/go/monitoring v1.24.2 h1:5OTsoJ1dXYIiMiuL+sYscLc9BumrL3CarVLL7dd7lHM= +cloud.google.com/go/monitoring v1.24.2/go.mod h1:x7yzPWcgDRnPEv3sI+jJGBkwl5qINf+6qY4eq0I9B4U= +cloud.google.com/go/trace v1.11.6 h1:2O2zjPzqPYAHrn3OKl029qlqG6W8ZdYaOWRyr8NgMT4= +cloud.google.com/go/trace v1.11.6/go.mod h1:GA855OeDEBiBMzcckLPE2kDunIpC72N+Pq8WFieFjnI= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 h1:Gt0j3wceWMwPmiazCa8MzMA0MfhmPIz0Qp0FJ6qcM0U= @@ -20,23 +29,37 @@ github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1/go.mod h github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 h1:oygO0locgZJe7PpYPXT5A29ZkwJaPqcva7BVeemZOZs= github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 h1:lhhYARPUu3LmHysQ/igznQphfzynnqI3D75oUyw1HXk= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0/go.mod h1:l9rva3ApbBpEJxSNYnwT9N4CDLrWgtq3u8736C5hyJw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.54.0 h1:xfK3bbi6F2RDtaZFtUdKO3osOBIhNb+xTs8lFW6yx9o= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.54.0/go.mod h1:vB2GH9GAYYJTO3mEn8oYwzEdhlayZIdQz6zdzgUIRvA= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 h1:s0WlVbf9qpvkh1c/uDAPElam0WrL7fHRIidgZJ7UqZI= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0/go.mod h1:Mf6O40IAyB9zR/1J8nGDDPirZQQPbYJni8Yisy7NTMc= github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls= +github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.13.4 h1:zEqyPVyku6IvWCFwux4x9RxkLOMUL+1vC9xUFv5l2/M= +github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= +github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= +github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -171,8 +194,11 @@ github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmd github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= @@ -205,6 +231,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= @@ -345,10 +373,10 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4= -google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= -google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b h1:ULiyYQ0FdsJhwwZUwbaXpZF5yUE3h+RA+gxvBu37ucc= -google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b/go.mod h1:oDOGiMSXHL4sDTJvFvIB9nRQCGdLP1o/iVaqQK8zB+M= +google.golang.org/genproto v0.0.0-20250922171735-9219d122eba9 h1:LvZVVaPE0JSqL+ZWb6ErZfnEOKIqqFWUJE2D0fObSmc= +google.golang.org/genproto v0.0.0-20250922171735-9219d122eba9/go.mod h1:QFOrLhdAe2PsTp3vQY4quuLKTi9j3XG3r6JPPaw7MSc= +google.golang.org/genproto/googleapis/api v0.0.0-20250922171735-9219d122eba9 h1:jm6v6kMRpTYKxBRrDkYAitNJegUeO1Mf3Kt80obv0gg= +google.golang.org/genproto/googleapis/api v0.0.0-20250922171735-9219d122eba9/go.mod h1:LmwNphe5Afor5V3R5BppOULHOnt2mCIf+NxMd4XiygE= google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 h1:M1rk8KBnUsBDg1oPGHNCxG4vc1f49epmTO7xscSajMk= google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= diff --git a/internal/tel/tel.go b/internal/tel/tel.go index 8d1f18cf..b1466ea7 100644 --- a/internal/tel/tel.go +++ b/internal/tel/tel.go @@ -14,47 +14,71 @@ package tel -import "context" +import ( + "context" + "strings" + "time" + + "cloud.google.com/go/cloudsqlconn/debug" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + "google.golang.org/api/option" + + cmexporter "github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" +) const ( - meterName = "alloydb.googleapis.com/client/connector" + meterName = "cloudsql.googleapis.com/client/connector" monitoredResource = "cloudsql.googleapis.com/InstanceClient" - connectLatency = "connect_latencies" + connectLatency = "connect_latencies" // dial latency closedConnectionCount = "closed_connection_count" openConnections = "open_connections" - // The identifier of the GCP project associated with this CSQL resource + // ResourceContainer is the identifier of the GCP project associated with this CSQL resource. ResourceContainer = "resource_container" - // The Cloud SQL instance identifier in the format of [project_name:instance_name] + // ResourceID is the Cloud SQL instance identifier in the format of [project_name:instance_name]. ResourceID = "resource_id" - // A unique identifier generated for each Dialer instance + // ClientUID is a unique identifier generated for each Dialer instance. ClientUID = "client_uid" - // The application name provided by the user or defaulted by the connector + // ApplicationName is the application name provided by the user or defaulted by the connector. ApplicationName = "application_name" - // Cloud SQL Instance's location e.g. us-central1 + // Region is the Cloud SQL Instance's location e.g. us-central1. Region = "region" // ClientRegion is the region from which the client is connecting, unknown if not on GCP ClientRegion = "client_region" // ComputePlatform is the platform on which the client is running, e.g. GCE, GKE, etc. ComputePlatform = "compute_platform" - // Cloud SQL Connector type. "go" in this case. + // ConnectorType is the Cloud SQL Connector type. "go" in this case. ConnectorType = "connector_type" - // Cloud SQL Connector version + // ConnectorVersion is the Cloud SQL Connector version. ConnectorVersion = "connector_version" - // Database engine type [MySQL, PostgreSQL, SQL Server]. + // DatabaseEngineType is the database engine type [MySQL, PostgreSQL, SQL Server]. DatabaseEngineType = "database_engine_type" // authType is one of iam or built-in - authType = "auth_type" + authType = "instance_auth_type" // IP address type of the connection, one of [public, psa, psc] - ipType = "ip_type" + ipType = "instance_ip_type" // status indicates whether the dial attempt succeeded or not. status = "status" + // ConnectSuccess indicates the dial attempt succeeded. + ConnectSuccess = "success" + // ConnectError indicates the dial attempt errors out. + ConnectError = "error" + // RefreshAheadType indicates the dialer is using a refresh ahead cache. + RefreshAheadType = "refresh_ahead" + // RefreshLazyType indicates the dialer is using a lazy cache. + RefreshLazyType = "lazy" ) // Config holds all the necessary information to configure a MetricRecorder. type Config struct { // Enabled specifies whether the metrics should be enabled. Enabled bool + // Version is the version of the alloydbconn.Dialer. + Version string // Project id ResourceContainer string // The Cloud SQL instance identifier in the format of [project_name:instance_name] @@ -77,22 +101,194 @@ type Config struct { DatabaseEngineType string } +// ConnectorTypeValue returns the connector type based on the user agent. +func ConnectorTypeValue(userAgent string) string { + if strings.Contains(userAgent, "cloud-sql-proxy") { + return "cloud-sql-proxy" + } + return "go" +} + +// AuthTypeValue returns the auth type string based on whether IAM Authn is enabled. +func AuthTypeValue(iamAuthn bool) string { + if iamAuthn { + return "iam" + } + return "built_in" +} + // Attributes holds all the various pieces of metadata to attach to a metric. type Attributes struct { // IAMAuthN specifies whether IAM authentication is enabled. IAMAuthN bool - // UserAgent is the full user-agent of the alloydbconn.Dialer. - UserAgent string // CacheHit specifies whether connection info was present in the cache. CacheHit bool // DialStatus specifies the result of the dial attempt. DialStatus string + // IpType specifies IP address type of the connection, one of [public, psa, psc]. + IPType string + // RefreshType specifies the type of cache in use (e.g., refresh ahead or + // lazy). + RefreshType string + // ipType specifies IP address type of the connection, one of [public, psa, psc]. + IPAddressType string } // MetricRecorder defines the interface for recording metrics related to the // internal operations of alloydbconn.Dialer. type MetricRecorder interface { RecordOpenConnection(context.Context, Attributes) + RecordClosedConnection(context.Context, Attributes) RecordClosedConnectionCount(context.Context, Attributes) - RecordConnectLatencies(context.Context, Attributes) + RecordConnectLatencies(context.Context, Attributes, int64) +} + +// DefaultExportInterval is the interval that the metric exporter runs. It +// should always be 60s. This value is exposed as a var to faciliate testing. +var DefaultExportInterval = 60 * time.Second + +// NewMetricRecorder creates a MetricRecorder. When the configuration is not +// enabled, a null recorder is returned instead. +func NewMetricRecorder(ctx context.Context, l debug.ContextLogger, cfg Config, opts ...option.ClientOption) MetricRecorder { + if !cfg.Enabled { + l.Debugf(ctx, "disabling built-in metrics") + return NullMetricRecorder{} + } + + eopts := []cmexporter.Option{ + cmexporter.WithCreateServiceTimeSeries(), + cmexporter.WithProjectID(cfg.ResourceContainer), + cmexporter.WithMonitoringClientOptions(opts...), + cmexporter.WithMetricDescriptorTypeFormatter(func(m metricdata.Metrics) string { + return "cloudsql.googleapis.com/client/connector/" + m.Name + }), + cmexporter.WithMonitoredResourceDescription(monitoredResource, []string{ + ResourceContainer, ResourceID, ClientUID, ApplicationName, Region, ClientRegion, + ComputePlatform, ConnectorType, ConnectorVersion, DatabaseEngineType, + }), + } + exp, err := cmexporter.New(eopts...) + if err != nil { + l.Debugf(ctx, "built-in metrics exporter failed to initialize: %v", err) + return NullMetricRecorder{} + } + + res := resource.NewWithAttributes(monitoredResource, + // The gcp.resource_type is a special attribute that the exporter + // transforms into the MonitoredResource field. + attribute.String("gcp.resource_type", monitoredResource), + attribute.String(ResourceContainer, cfg.ResourceContainer), + attribute.String(ResourceID, cfg.ResourceID), + attribute.String(ClientUID, cfg.ClientUID), + attribute.String(ApplicationName, cfg.ApplicationName), + attribute.String(Region, cfg.Region), + attribute.String(ClientRegion, cfg.ClientRegion), + attribute.String(ComputePlatform, cfg.ComputePlatform), + attribute.String(ConnectorType, cfg.ConnectorType), + attribute.String(ConnectorVersion, cfg.ConnectorVersion), + attribute.String(DatabaseEngineType, cfg.DatabaseEngineType), + ) + + p := sdkmetric.NewMeterProvider( + sdkmetric.WithReader(sdkmetric.NewPeriodicReader( + exp, + // The periodic reader runs every 60 seconds by default, but set + // the value anyway to be defensive. + sdkmetric.WithInterval(DefaultExportInterval), + )), + sdkmetric.WithResource(res), + ) + m := p.Meter(meterName, metric.WithInstrumentationVersion(cfg.Version)) + + mConnectLatency, err := m.Float64Histogram(connectLatency) + if err != nil { + _ = exp.Shutdown(ctx) + l.Debugf(ctx, "built-in metrics exporter failed to initialize dial latency metric: %v", err) + return NullMetricRecorder{} + } + mOpenConns, err := m.Int64UpDownCounter(openConnections) + if err != nil { + _ = exp.Shutdown(ctx) + l.Debugf(ctx, "built-in metrics exporter failed to initialize open connections metric: %v", err) + return NullMetricRecorder{} + } + mClosedConnectionCount, err := m.Int64Counter(closedConnectionCount) + if err != nil { + _ = exp.Shutdown(ctx) + l.Debugf(ctx, "built-in metrics exporter failed to initialize refresh count metric: %v", err) + return NullMetricRecorder{} + } + return &metricRecorder{ + exporter: exp, + provider: p, + dialerID: cfg.ClientUID, + mClosedConnectionCount: mClosedConnectionCount, + mConnectLatency: mConnectLatency, + mOpenConns: mOpenConns, + } +} + +// metricRecorder holds the various counters that track internal operations. +type metricRecorder struct { + exporter sdkmetric.Exporter + provider *sdkmetric.MeterProvider + dialerID string + mClosedConnectionCount metric.Int64Counter + mConnectLatency metric.Float64Histogram + mOpenConns metric.Int64UpDownCounter +} + +// RecordClosedConnectionCount records totals number of closed connections. +func (m *metricRecorder) RecordClosedConnectionCount(ctx context.Context, a Attributes) { + m.mClosedConnectionCount.Add(ctx, 1, + metric.WithAttributeSet(attribute.NewSet( + attribute.String(authType, AuthTypeValue(a.IAMAuthN)), + attribute.String(ipType, a.IPType), + attribute.String(status, a.DialStatus)), + )) +} + +// RecordOpenConnection records current number of open connections. +func (m *metricRecorder) RecordOpenConnection(ctx context.Context, a Attributes) { + m.mOpenConns.Add(ctx, 1, + metric.WithAttributeSet(attribute.NewSet( + attribute.String(authType, AuthTypeValue(a.IAMAuthN)), + attribute.String(ipType, a.IPType), + ))) +} + +// RecordOpenConnection records current number of open connections. +func (m *metricRecorder) RecordClosedConnection(ctx context.Context, a Attributes) { + m.mOpenConns.Add(ctx, -1, + metric.WithAttributeSet(attribute.NewSet( + attribute.String(authType, AuthTypeValue(a.IAMAuthN)), + attribute.String(ipType, a.IPType), + ))) +} + +// RecordConnectLatencies records dial latencies. +func (m *metricRecorder) RecordConnectLatencies(ctx context.Context, a Attributes, latencyMS int64) { + m.mConnectLatency.Record(ctx, float64(latencyMS), + metric.WithAttributeSet(attribute.NewSet( + attribute.String(authType, AuthTypeValue(a.IAMAuthN)), + attribute.String(ipType, a.IPType), + )), + ) +} + +// NullMetricRecorder implements the MetricRecorder interface with no-ops. It +// is useful for disabling the built-in metrics. +type NullMetricRecorder struct{} + +// RecordOpenConnection is a no-op. +func (n NullMetricRecorder) RecordOpenConnection(context.Context, Attributes) {} + +// RecordClosedConnection is a no-op. +func (n NullMetricRecorder) RecordClosedConnection(context.Context, Attributes) {} + +// RecordClosedConnectionCount is a no-op. +func (n NullMetricRecorder) RecordClosedConnectionCount(context.Context, Attributes) {} + +// RecordConnectLatencies is a no-op. +func (n NullMetricRecorder) RecordConnectLatencies(context.Context, Attributes, int64) { } diff --git a/options.go b/options.go index f1276d64..db2e2e66 100644 --- a/options.go +++ b/options.go @@ -51,6 +51,7 @@ type dialerConfig struct { authCredentials *auth.Credentials iamLoginTokenProvider auth.TokenProvider useragents []string + applicationName string setAdminAPIEndpoint bool setCredentials bool setHTTPClient bool @@ -121,6 +122,13 @@ func WithUserAgent(ua string) Option { } } +// WithApplicationName returns an Option that sets the Application Name. +func WithApplicationName(name string) Option { + return func(d *dialerConfig) { + d.applicationName = name + } +} + // WithDefaultDialOptions returns an Option that specifies the default // DialOptions used. func WithDefaultDialOptions(opts ...DialOption) Option {