Skip to content

Commit 19ef7f8

Browse files
Instrument Otel metrics
1 parent f157793 commit 19ef7f8

File tree

7 files changed

+748
-28
lines changed

7 files changed

+748
-28
lines changed

cmd/terraform-mcp-server/init.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,11 @@ var (
9494
}
9595

9696
enabledToolsets := getToolsetsFromCmd(cmd.Root(), logger)
97+
stdlog.Printf("Starting StreamableHTTP server with host: %s, port: %s, endpoint: %s, heartbeatInterval: %v, enabledToolsets: %v", host, port, endpointPath, heartbeatInterval, enabledToolsets)
98+
metricsConfig, shutdownMetrics := setupMetrics(logger)
99+
defer shutdownMetrics()
97100

98-
if err := runHTTPServer(logger, host, port, endpointPath, heartbeatInterval, enabledToolsets); err != nil {
101+
if err := runHTTPServer(logger, host, port, endpointPath, heartbeatInterval, enabledToolsets, metricsConfig); err != nil {
99102
stdlog.Fatal("failed to run streamableHTTP server:", err)
100103
}
101104
},

cmd/terraform-mcp-server/main.go

Lines changed: 116 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,19 @@ import (
1111
"os"
1212
"os/signal"
1313
"strings"
14+
"sync"
1415
"syscall"
1516
"time"
1617

1718
"github.com/hashicorp/terraform-mcp-server/pkg/client"
1819
"github.com/hashicorp/terraform-mcp-server/pkg/toolsets"
1920
"github.com/hashicorp/terraform-mcp-server/version"
21+
"go.opentelemetry.io/otel"
22+
"go.opentelemetry.io/otel/attribute"
23+
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
24+
"go.opentelemetry.io/otel/metric"
25+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
26+
"go.opentelemetry.io/otel/sdk/resource"
2027

2128
"github.com/mark3labs/mcp-go/mcp"
2229
"github.com/mark3labs/mcp-go/server"
@@ -27,11 +34,11 @@ import (
2734
//go:embed instructions.md
2835
var instructions string
2936

30-
func runHTTPServer(logger *log.Logger, host string, port string, endpointPath string, heartbeatInterval time.Duration, enabledToolsets []string) error {
37+
func runHTTPServer(logger *log.Logger, host string, port string, endpointPath string, heartbeatInterval time.Duration, enabledToolsets []string, metricsConfig client.MetricsConfig) error {
3138
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
3239
defer stop()
3340

34-
hcServer := NewServer(version.Version, logger, enabledToolsets)
41+
hcServer := NewServer(version.Version, logger, enabledToolsets, metricsConfig)
3542
registerToolsAndResources(hcServer, logger, enabledToolsets)
3643

3744
return streamableHTTPServerInit(ctx, hcServer, logger, host, port, endpointPath, heartbeatInterval)
@@ -41,13 +48,13 @@ func runStdioServer(logger *log.Logger, enabledToolsets []string) error {
4148
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
4249
defer stop()
4350

44-
hcServer := NewServer(version.Version, logger, enabledToolsets)
51+
hcServer := NewServer(version.Version, logger, enabledToolsets, client.MetricsConfig{})
4552
registerToolsAndResources(hcServer, logger, enabledToolsets)
4653

4754
return serverInit(ctx, hcServer, logger)
4855
}
4956

50-
func NewServer(version string, logger *log.Logger, enabledToolsets []string, opts ...server.ServerOption) *server.MCPServer {
57+
func NewServer(version string, logger *log.Logger, enabledToolsets []string, metricsConfig client.MetricsConfig, opts ...server.ServerOption) *server.MCPServer {
5158
// Create rate limiting middleware with environment-based configuration
5259
rateLimitConfig := client.LoadRateLimitConfigFromEnv()
5360
rateLimitMiddleware := client.NewRateLimitMiddleware(rateLimitConfig, logger)
@@ -87,6 +94,26 @@ func NewServer(version string, logger *log.Logger, enabledToolsets []string, opt
8794
client.NewSessionHandler(ctx, session, logger)
8895
}
8996
})
97+
if metricsConfig.Enabled {
98+
var toolStartTimes sync.Map
99+
hooks.AddBeforeCallTool(func(ctx context.Context, id any, message *mcp.CallToolRequest) {
100+
toolStartTimes.Store(fmt.Sprintf("%v", id), time.Now())
101+
})
102+
hooks.AddAfterCallTool(func(ctx context.Context, id any, message *mcp.CallToolRequest, result any) {
103+
startTime := time.Now()
104+
if storedStart, ok := toolStartTimes.LoadAndDelete(fmt.Sprintf("%v", id)); ok {
105+
if ts, ok := storedStart.(time.Time); ok {
106+
startTime = ts
107+
}
108+
}
109+
// Check if the result has any errors
110+
var toolErr error
111+
if res, ok := result.(*mcp.CallToolResult); ok && res.IsError {
112+
toolErr = fmt.Errorf("Tool reported error: %+v", res.Result)
113+
}
114+
client.RecordToolCall(ctx, startTime, toolErr, id, message, metricsConfig, logger)
115+
})
116+
}
90117

91118
// Add hooks to options
92119
opts = append(opts, server.WithHooks(hooks))
@@ -190,23 +217,25 @@ func runDefaultCommand(cmd *cobra.Command, _ []string) {
190217
}
191218

192219
func main() {
220+
logFile, _ := rootCmd.PersistentFlags().GetString("log-file")
221+
logLevel := getLogLevel(rootCmd)
222+
logFormat := getLogFormat(rootCmd)
223+
logger, err := initLogger(logFile, logLevel, logFormat)
224+
if err != nil {
225+
stdlog.Fatal("Failed to initialize logger:", err)
226+
}
193227
if shouldUseStreamableHTTPMode() {
228+
logger.Info("Starting in Streamable HTTP mode based on environment configuration")
229+
230+
metricsConfig, shutdownMetrics := setupMetrics(logger)
231+
defer shutdownMetrics()
232+
194233
port := getHTTPPort()
195234
host := getHTTPHost()
196235
endpointPath := getEndpointPath(nil)
197-
198-
logFile, _ := rootCmd.PersistentFlags().GetString("log-file")
199-
logLevel := getLogLevel(rootCmd)
200-
logFormat := getLogFormat(rootCmd)
201-
logger, err := initLogger(logFile, logLevel, logFormat)
202-
if err != nil {
203-
stdlog.Fatal("Failed to initialize logger:", err)
204-
}
205-
206236
enabledToolsets := getToolsetsFromCmd(rootCmd, logger)
207-
208237
heartbeatInterval := getHeartbeatInterval()
209-
if err := runHTTPServer(logger, host, port, endpointPath, heartbeatInterval, enabledToolsets); err != nil {
238+
if err := runHTTPServer(logger, host, port, endpointPath, heartbeatInterval, enabledToolsets, metricsConfig); err != nil {
210239
stdlog.Fatal("failed to run StreamableHTTP server:", err)
211240
}
212241
return
@@ -284,3 +313,75 @@ func getHeartbeatInterval() time.Duration {
284313
}
285314
return 0
286315
}
316+
317+
func setupMetrics(logger *log.Logger) (client.MetricsConfig, func()) {
318+
metricsConfig := client.LoadMetricsConfigFromEnv()
319+
logger.Infof("Metrics enabled: %t endpoint: %s exportInterval: %s", metricsConfig.Enabled, metricsConfig.Endpoint, metricsConfig.ExportInterval)
320+
if !metricsConfig.Enabled {
321+
return metricsConfig, func() {}
322+
}
323+
324+
// Context for metrics is for tracking the lifecycle of the metrics setup and shutdown, not tied to individual tool calls.
325+
ctxMetrics := context.Background()
326+
otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) {
327+
log.Errorf("OTel Internal Error: %v", err)
328+
}))
329+
330+
shutdown, err := initMetrics(ctxMetrics, &metricsConfig, logger)
331+
if err != nil {
332+
logger.Errorf("Failed to initialize metrics: %v", err)
333+
return metricsConfig, func() {}
334+
}
335+
336+
return metricsConfig, shutdown
337+
}
338+
339+
func initMetrics(ctx context.Context, config *client.MetricsConfig, logger *log.Logger) (func(), error) {
340+
logger.Infof("Initializing exporter and meter provider for OTel metrics...")
341+
// Create the Exporter (Sends data to the Collector)
342+
// exporter, err := stdoutmetric.New() // For stdio debugging
343+
exporter, err := otlpmetrichttp.New(ctx, otlpmetrichttp.WithEndpoint(config.Endpoint), otlpmetrichttp.WithInsecure())
344+
if err != nil {
345+
return nil, fmt.Errorf("failed to create metrics exporter: %w", err)
346+
}
347+
// Create the MeterProvider with a PeriodicReader
348+
// The reader flushes metrics to the exporter periodically
349+
resourceAttrs := resource.NewSchemaless(
350+
attribute.String("service.name", config.ServiceName),
351+
attribute.String("service.version", config.ServiceVersion),
352+
)
353+
config.MeterProvider = sdkmetric.NewMeterProvider(
354+
sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter, sdkmetric.WithInterval(config.ExportInterval))),
355+
sdkmetric.WithResource(resourceAttrs),
356+
)
357+
358+
// Set it as the global provider
359+
otel.SetMeterProvider(config.MeterProvider)
360+
361+
meter := config.MeterProvider.Meter(config.ServiceName)
362+
363+
config.ToolCounter, err = meter.Int64Counter("mcp_tool_calls_total")
364+
if err != nil {
365+
return nil, fmt.Errorf("failed to create tool counter: %w", err)
366+
}
367+
368+
config.ErrorCounter, err = meter.Int64Counter("mcp_tool_errors_total",
369+
metric.WithDescription("Total number of failed tool calls"))
370+
if err != nil {
371+
return nil, fmt.Errorf("failed to create error counter: %w", err)
372+
}
373+
374+
config.ToolCallLatencyBucket, err = meter.Float64Histogram("mcp_tool_duration_seconds",
375+
metric.WithDescription("Duration of tool calls in seconds"),
376+
metric.WithUnit("s"))
377+
if err != nil {
378+
return nil, fmt.Errorf("failed to create latency histogram: %w", err)
379+
}
380+
381+
return func() {
382+
logger.Infof("Shutting down metrics exporter..")
383+
if err := config.MeterProvider.Shutdown(ctx); err != nil {
384+
logger.Errorf("Error shutting down meter provider: %v", err)
385+
}
386+
}, nil
387+
}

0 commit comments

Comments
 (0)