Skip to content

Commit dd42365

Browse files
authored
exposing trident metrics through https
1 parent 8886713 commit dd42365

File tree

12 files changed

+371
-18
lines changed

12 files changed

+371
-18
lines changed

cli/cmd/images.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2023 NetApp, Inc. All Rights Reserved.
1+
// Copyright 2025 NetApp, Inc. All Rights Reserved.
22

33
package cmd
44

@@ -149,6 +149,7 @@ func getInstallYaml(semVersion *versionutils.Version) (string, error) {
149149
Version: semVersion,
150150
HTTPRequestTimeout: tridentconfig.HTTPTimeoutString,
151151
ServiceAccountName: getControllerRBACResourceName(),
152+
HTTPSMetrics: false,
152153
}
153154
// Get Deployment and Daemonset YAML and collect the names of the container images Trident needs to run.
154155
yaml := k8sclient.GetCSIDeploymentYAML(deploymentArgs)

cli/cmd/install.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ var (
123123
httpRequestTimeout time.Duration
124124
acpImage string // TODO: Remove after 26.04.
125125
enableACP bool // TODO: Remove after 26.04.
126+
httpsMetrics bool
126127
cloudProvider string
127128
cloudIdentity string
128129
iscsiSelfHealingInterval time.Duration
@@ -240,7 +241,7 @@ func init() {
240241
installCmd.Flags().DurationVar(&k8sTimeout, "k8s-timeout", 180*time.Second,
241242
"The timeout for all Kubernetes operations.")
242243
installCmd.Flags().DurationVar(&httpRequestTimeout, "http-request-timeout", tridentconfig.HTTPTimeout,
243-
"Override the HTTP request timeout for Trident controllers REST API")
244+
"Override the HTTP request timeout for Trident controller's REST API")
244245
installCmd.Flags().DurationVar(&iscsiSelfHealingInterval, "iscsi-self-healing-interval", tridentconfig.IscsiSelfHealingInterval,
245246
"Override the default iSCSI self-healing interval.")
246247
installCmd.Flags().DurationVar(&iscsiSelfHealingWaitTime, "iscsi-self-healing-wait-time", tridentconfig.ISCSISelfHealingWaitTime,
@@ -250,6 +251,8 @@ func init() {
250251
installCmd.Flags().StringVar(&acpImage, "acp-image", "",
251252
"Override the default trident-acp container image (obsolete).")
252253

254+
installCmd.Flags().BoolVar(&httpsMetrics, "https-metrics", false, "Enable HTTPS metrics endpoint for Trident controller.")
255+
253256
installCmd.Flags().StringVar(&cloudProvider, "cloud-provider", "", "Name of the cloud provider")
254257
installCmd.Flags().StringVar(&cloudIdentity, "cloud-identity", "", "Cloud identity to be set on service account")
255258

@@ -695,6 +698,7 @@ func prepareYAMLFiles() error {
695698
IdentityLabel: identityLabel,
696699
K8sAPIQPS: k8sAPIQPS,
697700
EnableConcurrency: enableConcurrency,
701+
HTTPSMetrics: httpsMetrics,
698702
}
699703
deploymentYAML := k8sclient.GetCSIDeploymentYAML(deploymentArgs)
700704
if err = writeFile(deploymentPath, deploymentYAML); err != nil {
@@ -1067,6 +1071,7 @@ func installTrident() (returnError error) {
10671071
IdentityLabel: identityLabel,
10681072
K8sAPIQPS: k8sAPIQPS,
10691073
EnableConcurrency: enableConcurrency,
1074+
HTTPSMetrics: httpsMetrics,
10701075
CSIFeatureGates: csiFeatureGateYAMLSnippets,
10711076
}
10721077
returnError = client.CreateObjectByYAML(

cli/k8s_client/types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ type DeploymentYAMLArguments struct {
173173
IdentityLabel bool `json:"identityLabel"`
174174
K8sAPIQPS int `json:"k8sAPIQPS"`
175175
EnableConcurrency bool `json:"enableConcurrency"`
176+
HTTPSMetrics bool `json:"httpsMetrics"`
176177
CSIFeatureGates map[string]string `json:"csiFeatureGates"`
177178
}
178179

cli/k8s_client/yaml_factory.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,10 @@ spec:
349349
protocol: TCP
350350
port: 9220
351351
targetPort: 8001
352+
- name: https-metrics
353+
protocol: TCP
354+
port: 8444
355+
targetPort: 8444
352356
`
353357

354358
func GetResourceQuotaYAML(resourceQuotaName, namespace string, labels, controllingCRDetails map[string]string) string {
@@ -506,7 +510,7 @@ func getCSIDeploymentAutosupportVolumeYAML(args *DeploymentYAMLArguments) string
506510
}
507511

508512
func GetCSIDeploymentYAML(args *DeploymentYAMLArguments) string {
509-
var debugLine, sideCarLogLevel, ipLocalhost, enableACP, K8sAPISidecarThrottle, K8sAPITridentThrottle string
513+
var debugLine, sideCarLogLevel, ipLocalhost, enableACP, httpsMetrics, metrics, K8sAPISidecarThrottle, K8sAPITridentThrottle string
510514
Log().WithFields(LogFields{
511515
"Args": args,
512516
}).Trace(">>>> GetCSIDeploymentYAML")
@@ -567,6 +571,12 @@ func GetCSIDeploymentYAML(args *DeploymentYAMLArguments) string {
567571
enableACP = "- \"-enable_acp\""
568572
}
569573

574+
if args.HTTPSMetrics {
575+
httpsMetrics = "- \"--https_metrics\""
576+
} else {
577+
metrics = "- \"--metrics\""
578+
}
579+
570580
if strings.EqualFold(args.CloudProvider, CloudProviderAzure) {
571581
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{AZURE_CREDENTIAL_FILE_ENV}", "- name: AZURE_CREDENTIAL_FILE\n value: /etc/kubernetes/azure.json")
572582
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{AZURE_CREDENTIAL_FILE_VOLUME}",
@@ -633,6 +643,8 @@ func GetCSIDeploymentYAML(args *DeploymentYAMLArguments) string {
633643
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{K8S_API_CLIENT_TRIDENT_THROTTLE}", K8sAPITridentThrottle)
634644
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{K8S_API_CLIENT_SIDECAR_THROTTLE}", K8sAPISidecarThrottle)
635645
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{ENABLE_CONCURRENCY}", strconv.FormatBool(args.EnableConcurrency))
646+
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{METRICS}", metrics)
647+
deploymentYAML = strings.ReplaceAll(deploymentYAML, "{HTTPS_METRICS}", httpsMetrics)
636648

637649
// Log before secrets are inserted into YAML.
638650
Log().WithField("yaml", deploymentYAML).Trace("CSI Deployment YAML.")
@@ -696,7 +708,8 @@ spec:
696708
- "--http_request_timeout={HTTP_REQUEST_TIMEOUT}"
697709
- "--enable_force_detach={ENABLE_FORCE_DETACH}"
698710
- "--enable_concurrency={ENABLE_CONCURRENCY}"
699-
- "--metrics"
711+
{METRICS}
712+
{HTTPS_METRICS}
700713
{ENABLE_ACP}
701714
{DEBUG}
702715
{K8S_API_CLIENT_TRIDENT_THROTTLE}

cli/k8s_client/yaml_factory_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func TestYAMLFactory(t *testing.T) {
9292
Version: version,
9393
HTTPRequestTimeout: config.HTTPTimeoutString,
9494
EnableACP: true,
95+
HTTPSMetrics: true,
9596
IdentityLabel: true,
9697
K8sAPIQPS: 100,
9798
}
@@ -191,6 +192,7 @@ func TestGetCSIDeploymentYAML_WithExcludeAutosupport(t *testing.T) {
191192
SilenceAutosupport: false,
192193
ExcludeAutosupport: true,
193194
EnableACP: true,
195+
HTTPSMetrics: true,
194196
K8sAPIQPS: 100,
195197
}
196198
installASUPDeploymentArgs := &DeploymentYAMLArguments{
@@ -215,6 +217,7 @@ func TestGetCSIDeploymentYAML_WithExcludeAutosupport(t *testing.T) {
215217
SilenceAutosupport: false,
216218
ExcludeAutosupport: false,
217219
EnableACP: true,
220+
HTTPSMetrics: true,
218221
K8sAPIQPS: 100,
219222
}
220223

@@ -259,6 +262,7 @@ func TestValidateGetCSIDeploymentYAMLSuccess(t *testing.T) {
259262
SilenceAutosupport: false,
260263
ExcludeAutosupport: false,
261264
EnableACP: true,
265+
HTTPSMetrics: true,
262266
K8sAPIQPS: 100,
263267
}
264268

@@ -593,6 +597,7 @@ func TestGetCSIDeploymentYAML_AutosupportYAML_ExcludeAutosupport(t *testing.T) {
593597
SilenceAutosupport: false,
594598
ExcludeAutosupport: true,
595599
EnableACP: true,
600+
HTTPSMetrics: true,
596601
K8sAPIQPS: 100,
597602
}
598603

@@ -640,6 +645,7 @@ func TestGetCSIDeploymentYAML_AutosupportYAML_EnableButSilenceAutosupport(t *tes
640645
SilenceAutosupport: silenceASUP,
641646
ExcludeAutosupport: false,
642647
EnableACP: true,
648+
HTTPSMetrics: true,
643649
K8sAPIQPS: 100,
644650
}
645651

frontend/metrics/plugin.go

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1-
// Copyright 2019 NetApp, Inc. All Rights Reserved.
1+
// Copyright 2025 NetApp, Inc. All Rights Reserved.
22

33
package metrics
44

55
import (
66
"context"
7+
"crypto/tls"
8+
"crypto/x509"
79
"fmt"
810
"net/http"
11+
"os"
12+
"time"
913

1014
"github.com/prometheus/client_golang/prometheus/promhttp"
1115

@@ -68,3 +72,103 @@ func (s *Server) GetName() string {
6872
func (s *Server) Version() string {
6973
return config.OrchestratorAPIVersion
7074
}
75+
76+
// HTTPSServer represents an HTTPS metrics server
77+
type HTTPSServer struct {
78+
server *http.Server
79+
caCertFile string
80+
serverCertFile string
81+
serverKeyFile string
82+
}
83+
84+
// NewHTTPSMetricsServer creates a new HTTPS metrics server with TLS configuration
85+
func NewHTTPSMetricsServer(address, port, caCertFile, serverCertFile, serverKeyFile string, enableMutualTLS bool, writeTimeout time.Duration) (*HTTPSServer, error) {
86+
ctx := GenerateRequestContext(nil, "", ContextSourceInternal, WorkflowPluginCreate, LogLayerMetricsFrontend)
87+
88+
httpsServer := &HTTPSServer{
89+
server: &http.Server{
90+
Addr: fmt.Sprintf("%s:%s", address, port),
91+
Handler: &metricsAuthHandler{handler: promhttp.Handler()},
92+
TLSConfig: &tls.Config{
93+
ClientAuth: tls.RequireAndVerifyClientCert,
94+
MinVersion: config.MinServerTLSVersion,
95+
},
96+
ReadTimeout: config.HTTPTimeout,
97+
WriteTimeout: writeTimeout,
98+
},
99+
caCertFile: caCertFile,
100+
serverCertFile: serverCertFile,
101+
serverKeyFile: serverKeyFile,
102+
}
103+
104+
// Configure for non-mutual TLS if needed
105+
if !enableMutualTLS {
106+
httpsServer.server.Handler = promhttp.Handler()
107+
httpsServer.server.TLSConfig.ClientAuth = tls.NoClientCert
108+
}
109+
110+
// Load CA certificate if provided
111+
if caCertFile != "" {
112+
caCert, err := os.ReadFile(caCertFile)
113+
if err != nil {
114+
return nil, fmt.Errorf("could not read CA certificate file: %v", err)
115+
}
116+
caCertPool := x509.NewCertPool()
117+
caCertPool.AppendCertsFromPEM(caCert)
118+
httpsServer.server.TLSConfig.ClientCAs = caCertPool
119+
}
120+
121+
Logc(ctx).WithField("address", httpsServer.server.Addr).Info("Initializing HTTPS metrics frontend.")
122+
123+
return httpsServer, nil
124+
}
125+
126+
func (s *HTTPSServer) Activate() error {
127+
go func() {
128+
ctx := GenerateRequestContext(nil, "", ContextSourceInternal, WorkflowPluginActivate, LogLayerMetricsFrontend)
129+
130+
Logc(ctx).WithField("address", s.server.Addr).Info("Activating HTTPS metrics frontend.")
131+
132+
err := s.server.ListenAndServeTLS(s.serverCertFile, s.serverKeyFile)
133+
if err == http.ErrServerClosed {
134+
Logc(ctx).WithField("address", s.server.Addr).Info("HTTPS metrics frontend server has closed.")
135+
} else if err != nil {
136+
Logc(ctx).Fatal(err)
137+
}
138+
}()
139+
return nil
140+
}
141+
142+
func (s *HTTPSServer) Deactivate() error {
143+
ctx := GenerateRequestContext(nil, "", ContextSourceInternal, WorkflowPluginDeactivate, LogLayerMetricsFrontend)
144+
145+
Logc(ctx).WithField("address", s.server.Addr).Info("Deactivating HTTPS metrics frontend.")
146+
ctx, cancel := context.WithTimeout(ctx, config.HTTPTimeout)
147+
defer cancel()
148+
return s.server.Shutdown(ctx)
149+
}
150+
151+
func (s *HTTPSServer) GetName() string {
152+
return "HTTPS metrics"
153+
}
154+
155+
func (s *HTTPSServer) Version() string {
156+
return config.OrchestratorAPIVersion
157+
}
158+
159+
// metricsAuthHandler handles TLS authentication for metrics endpoints
160+
type metricsAuthHandler struct {
161+
handler http.Handler
162+
}
163+
164+
func (h *metricsAuthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
165+
// Service requests from Trident nodes with a valid client certificate
166+
if len(r.TLS.PeerCertificates) > 0 && r.TLS.PeerCertificates[0].Subject.CommonName == config.ClientCertName {
167+
ctx := GenerateRequestContext(nil, "", ContextSourceInternal, WorkflowPluginActivate, LogLayerMetricsFrontend)
168+
Logc(ctx).WithField("peerCert", config.ClientCertName).Debug("Authenticated by HTTPS metrics frontend.")
169+
h.handler.ServeHTTP(w, r)
170+
} else {
171+
w.Header().Set("WWW-Authenticate", fmt.Sprintf("Basic realm=\"%s\"", config.OrchestratorName))
172+
w.WriteHeader(http.StatusUnauthorized)
173+
}
174+
}

0 commit comments

Comments
 (0)