Skip to content

Commit dcdbc43

Browse files
MCP server: Add usage metrics (#711)
- Instrument every MCP tool with the new `shared.TrackToolExecution` helper so each handler automatically emits `tools.<tool_name>.count_*` and `duration_ms` metrics, marking success via `tracker.MarkSuccess()` just before returning. - Tag metrics with human-readable organization names by introducing a cached org-name resolver (pkg/tools/internal/shared/org_resolver.go) wired through `tools.ConfigureMetrics(provider)` (called once during server bootstrap) and backed by the organization gRPC client + stub support. - Document the workflow in `AGENTS.md` so future tools follow the same pattern, and add targeted unit tests for both the metrics helper and resolver caching logic.
1 parent 1d5abaa commit dcdbc43

File tree

15 files changed

+1133
-22
lines changed

15 files changed

+1133
-22
lines changed

mcp_server/AGENTS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,9 @@ Follow Conventional Commits (e.g., `feat(auth):`, `fix(front):`, `docs:`) and ke
1717

1818
## Security & Configuration Tips
1919
Surface dependency issues early with `make check.ex.deps`, `make check.go.deps`, and `make check.docker`. Store secrets in local `.env` files; never commit credentials. Runtime configuration reads internal gRPC endpoints from `INTERNAL_API_URL_PLUMBER`, `INTERNAL_API_URL_JOB`, `INTERNAL_API_URL_LOGHUB`, and `INTERNAL_API_URL_LOGHUB2`, falling back to legacy `MCP_*` variables. Export `DOCKER_BUILDKIT=1` to mirror CI Docker builds.
20+
21+
## MCP Tool Metrics Quickstart
22+
- Shared instrumentation lives in `pkg/tools/internal/shared/metrics.go`. Inside every MCP tool handler, create a tracker with `tracker := shared.TrackToolExecution(ctx, "<tool_name>", orgID)`, `defer tracker.Cleanup()`, and call `tracker.MarkSuccess()` right before you return a successful result.
23+
- Organization tags resolve via `pkg/tools/internal/shared/org_resolver.go`. The resolver is configured once through `tools.ConfigureMetrics(provider)` during server bootstrap, so new tools only need to supply the org ID (or `""` when not applicable).
24+
- For org-agnostic tools (e.g., `organizations_list`), pass an empty org ID so we still emit `count_*` and `duration_ms` metrics without tags.
25+
- Following this pattern ensures every tool automatically publishes `tools.<tool_name>.count_total|count_passed|count_failed` and `tools.<tool_name>.duration_ms` metrics, with human-readable org tags whenever available, keeping dashboards consistent without extra boilerplate.

mcp_server/cmd/mcp_server/main.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,31 @@ import (
1919

2020
"github.com/semaphoreio/semaphore/mcp_server/pkg/internalapi"
2121
"github.com/semaphoreio/semaphore/mcp_server/pkg/logging"
22+
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools"
2223
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools/jobs"
2324
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools/organizations"
2425
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools/pipelines"
2526
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools/projects"
2627
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools/workflows"
28+
"github.com/semaphoreio/semaphore/mcp_server/pkg/watchman"
2729
support "github.com/semaphoreio/semaphore/mcp_server/test/support"
2830
)
2931

3032
var (
31-
versionFlag = flag.Bool("version", false, "print the server version and exit")
32-
nameFlag = flag.String("name", "semaphore-mcp-server", "implementation name advertised to MCP clients")
33-
httpAddr = flag.String("http", ":3001", "address to serve the streamable MCP transport")
34-
version = "0.1.0"
33+
versionFlag = flag.Bool("version", false, "print the server version and exit")
34+
nameFlag = flag.String("name", "semaphore-mcp-server", "implementation name advertised to MCP clients")
35+
httpAddr = flag.String("http", ":3001", "address to serve the streamable MCP transport")
36+
version = "0.1.0"
37+
metricsNamespace = os.Getenv("METRICS_NAMESPACE")
38+
)
39+
40+
const (
41+
metricService = "mcp-server"
3542
)
3643

3744
func main() {
45+
watchman.Configure(fmt.Sprintf("%s.%s", metricService, metricsNamespace))
46+
3847
flag.Parse()
3948

4049
if *versionFlag {
@@ -91,6 +100,10 @@ func main() {
91100
}()
92101
}
93102

103+
// Configure organization name resolver for metrics tagging.
104+
// This must be called once before registering tools that emit metrics.
105+
tools.ConfigureMetrics(provider)
106+
94107
organizations.Register(srv, provider)
95108
projects.Register(srv, provider)
96109
workflows.Register(srv, provider)

mcp_server/pkg/tools/config.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package tools
2+
3+
import (
4+
"github.com/semaphoreio/semaphore/mcp_server/pkg/internalapi"
5+
"github.com/semaphoreio/semaphore/mcp_server/pkg/tools/internal/shared"
6+
)
7+
8+
// ConfigureMetrics initializes global metrics configuration for all tools.
9+
// This should be called once during server initialization before registering any tools.
10+
//
11+
// It configures the organization name resolver used for metrics tagging,
12+
// allowing metrics to be tagged with human-readable organization names
13+
// instead of UUIDs.
14+
func ConfigureMetrics(provider internalapi.Provider) {
15+
shared.ConfigureDefaultOrgResolver(provider)
16+
}
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
package shared
2+
3+
import (
4+
"context"
5+
"strings"
6+
"time"
7+
8+
watchman "github.com/renderedtext/go-watchman"
9+
10+
"github.com/semaphoreio/semaphore/mcp_server/pkg/logging"
11+
)
12+
13+
var (
14+
watchmanBenchmarkWithTags = watchman.BenchmarkWithTags
15+
watchmanIncrementWithTags = watchman.IncrementWithTags
16+
)
17+
18+
// ToolMetrics emits Watchman metrics for a specific tool invocation.
19+
type ToolMetrics struct {
20+
base string
21+
tags []string
22+
}
23+
24+
// NewToolMetrics prepares a metrics emitter scoped to a tool and optional organization ID.
25+
func NewToolMetrics(ctx context.Context, toolName, orgID string) *ToolMetrics {
26+
resolver := getOrgNameResolver()
27+
return newToolMetricsWithResolver(ctx, toolName, orgID, resolver)
28+
}
29+
30+
func newToolMetricsWithResolver(ctx context.Context, toolName, orgID string, resolver OrgNameResolver) *ToolMetrics {
31+
name := strings.TrimSpace(toolName)
32+
if name == "" {
33+
return nil
34+
}
35+
36+
base := "tools." + name
37+
tags := make([]string, 0, 1)
38+
39+
if tag := resolveOrgTag(ctx, orgID, resolver); tag != "" {
40+
tags = append(tags, tag)
41+
}
42+
43+
return &ToolMetrics{
44+
base: base,
45+
tags: tags,
46+
}
47+
}
48+
49+
// IncrementTotal bumps the total execution counter.
50+
func (tm *ToolMetrics) IncrementTotal() {
51+
tm.increment("count_total")
52+
}
53+
54+
// IncrementSuccess bumps the successful execution counter.
55+
func (tm *ToolMetrics) IncrementSuccess() {
56+
tm.increment("count_passed")
57+
}
58+
59+
// IncrementFailure bumps the failed execution counter.
60+
func (tm *ToolMetrics) IncrementFailure() {
61+
tm.increment("count_failed")
62+
}
63+
64+
// TrackDuration submits the elapsed duration since start.
65+
func (tm *ToolMetrics) TrackDuration(start time.Time) {
66+
if tm == nil {
67+
return
68+
}
69+
70+
name := tm.metricName("duration_ms")
71+
if err := watchmanBenchmarkWithTags(start, name, tm.tags); err != nil {
72+
logMetricError(name, err)
73+
}
74+
}
75+
76+
func (tm *ToolMetrics) increment(suffix string) {
77+
if tm == nil {
78+
return
79+
}
80+
name := tm.metricName(suffix)
81+
if err := watchmanIncrementWithTags(name, tm.tags); err != nil {
82+
logMetricError(name, err)
83+
}
84+
}
85+
86+
func (tm *ToolMetrics) metricName(suffix string) string {
87+
if tm == nil {
88+
return suffix
89+
}
90+
if suffix == "" {
91+
return tm.base
92+
}
93+
return tm.base + "." + suffix
94+
}
95+
96+
func resolveOrgTag(ctx context.Context, orgID string, resolver OrgNameResolver) string {
97+
orgID = strings.TrimSpace(orgID)
98+
if orgID == "" {
99+
return ""
100+
}
101+
102+
value := orgID
103+
if resolver != nil {
104+
if name, err := resolver.Resolve(ctx, orgID); err == nil {
105+
name = strings.TrimSpace(name)
106+
if name != "" {
107+
value = name
108+
}
109+
} else {
110+
logging.ForComponent("metrics").
111+
WithError(err).
112+
WithField("orgId", orgID).
113+
Debug("failed to resolve organization name for metrics")
114+
}
115+
}
116+
117+
return sanitizeMetricTag("org_" + value)
118+
}
119+
120+
func sanitizeMetricTag(value string) string {
121+
value = strings.TrimSpace(strings.ToLower(value))
122+
if value == "" {
123+
return ""
124+
}
125+
value = strings.ReplaceAll(value, " ", "_")
126+
return value
127+
}
128+
129+
func logMetricError(metric string, err error) {
130+
if err == nil {
131+
return
132+
}
133+
logging.ForComponent("metrics").
134+
WithError(err).
135+
WithField("metric", metric).
136+
Debug("failed to submit Watchman metric")
137+
}
138+
139+
// ToolExecutionTracker helps track tool execution metrics with a consistent pattern.
140+
// It provides methods to mark success and automatically handles cleanup via defer.
141+
type ToolExecutionTracker struct {
142+
metrics *ToolMetrics
143+
start time.Time
144+
success *bool
145+
}
146+
147+
// TrackToolExecution creates a new tracker for monitoring tool execution metrics.
148+
// It automatically increments the total counter and sets up cleanup logic.
149+
//
150+
// Usage:
151+
//
152+
// tracker := shared.TrackToolExecution(ctx, toolName, orgID)
153+
// defer tracker.Cleanup()
154+
// // ... tool logic ...
155+
// tracker.MarkSuccess() // Call before successful return
156+
func TrackToolExecution(ctx context.Context, toolName, orgID string) *ToolExecutionTracker {
157+
metrics := NewToolMetrics(ctx, toolName, orgID)
158+
if metrics != nil {
159+
metrics.IncrementTotal()
160+
}
161+
162+
success := false
163+
return &ToolExecutionTracker{
164+
metrics: metrics,
165+
start: time.Now(),
166+
success: &success,
167+
}
168+
}
169+
170+
// MarkSuccess marks the tool execution as successful.
171+
// This should be called just before returning a successful result.
172+
func (t *ToolExecutionTracker) MarkSuccess() {
173+
if t != nil && t.success != nil {
174+
*t.success = true
175+
}
176+
}
177+
178+
// Cleanup emits duration and success/failure metrics.
179+
// This should be called via defer immediately after creating the tracker.
180+
func (t *ToolExecutionTracker) Cleanup() {
181+
if t == nil || t.metrics == nil {
182+
return
183+
}
184+
t.metrics.TrackDuration(t.start)
185+
if t.success != nil && *t.success {
186+
t.metrics.IncrementSuccess()
187+
} else {
188+
t.metrics.IncrementFailure()
189+
}
190+
}

0 commit comments

Comments
 (0)