Skip to content

Commit 3a0e589

Browse files
authored
Fix daemon shutdown deadlock and improve MCP transport observability (#206)
* Bump mcp-go to v0.41.1 + go mod tidy * Clean up temp error handling for 'method not found' on MCP servers * Supply logging adaptor to MCP library to handle logging errors from servers * Prevent hanging during pinging all servers when trying to quit (CTRL+C)
1 parent 9f414d9 commit 3a0e589

File tree

8 files changed

+441
-209
lines changed

8 files changed

+441
-209
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ require (
88
github.com/go-chi/chi/v5 v5.2.3
99
github.com/go-chi/cors v1.2.2
1010
github.com/hashicorp/go-hclog v1.6.3
11-
github.com/mark3labs/mcp-go v0.39.1
11+
github.com/mark3labs/mcp-go v0.41.1
1212
github.com/spf13/cobra v1.10.1
1313
github.com/spf13/pflag v1.0.10
1414
github.com/stretchr/testify v1.11.1

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
4040
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
4141
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
4242
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
43-
github.com/mark3labs/mcp-go v0.39.1 h1:2oPxk7aDbQhouakkYyKl2T4hKFU1c6FDaubWyGyVE1k=
44-
github.com/mark3labs/mcp-go v0.39.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
43+
github.com/mark3labs/mcp-go v0.41.1 h1:w78eWfiQam2i8ICL7AL0WFiq7KHNJQ6UB53ZVtH4KGA=
44+
github.com/mark3labs/mcp-go v0.41.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
4545
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
4646
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
4747
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=

internal/api/mcp.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,6 @@ package api
22

33
import "github.com/mark3labs/mcp-go/mcp"
44

5-
// methodNotFoundMessage is the error message returned by MCP servers when a method is not implemented.
6-
// TODO: This string matching is fragile and should be replaced with proper JSON-RPC error code checking.
7-
// Once mcp-go preserves JSON-RPC error codes, use errors.Is(err, mcp.ErrMethodNotFound) instead.
8-
// See: https://github.com/mark3labs/mcp-go/issues/593
9-
const methodNotFoundMessage = "Method not found"
10-
115
// DomainMeta wraps mcp.Meta for API conversion.
126
type DomainMeta mcp.Meta
137

internal/api/prompts.go

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ package api
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
6-
"strings"
77
"time"
88

99
"github.com/danielgtaylor/huma/v2"
1010
"github.com/mark3labs/mcp-go/mcp"
1111

1212
"github.com/mozilla-ai/mcpd/v2/internal/contracts"
13-
"github.com/mozilla-ai/mcpd/v2/internal/errors"
13+
errorsint "github.com/mozilla-ai/mcpd/v2/internal/errors"
1414
)
1515

1616
// DomainPrompt wraps mcp.Prompt for API conversion.
@@ -147,7 +147,7 @@ func handleServerPrompts(
147147
) (*PromptsListResponse, error) {
148148
mcpClient, clientOk := accessor.Client(name)
149149
if !clientOk {
150-
return nil, fmt.Errorf("%w: %s", errors.ErrServerNotFound, name)
150+
return nil, fmt.Errorf("%w: %s", errorsint.ErrServerNotFound, name)
151151
}
152152

153153
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
@@ -162,16 +162,13 @@ func handleServerPrompts(
162162

163163
result, err := mcpClient.ListPrompts(ctx, req)
164164
if err != nil {
165-
// TODO: This string matching is fragile and should be replaced with proper JSON-RPC error code checking.
166-
// Once mcp-go preserves JSON-RPC error codes, use errors.Is(err, mcp.ErrMethodNotFound) instead.
167-
// See: https://github.com/mark3labs/mcp-go/issues/593
168-
if strings.Contains(err.Error(), methodNotFoundMessage) {
169-
return nil, fmt.Errorf("%w: %s", errors.ErrPromptsNotImplemented, name)
165+
if errors.Is(err, mcp.ErrMethodNotFound) {
166+
return nil, fmt.Errorf("%w: %s", errorsint.ErrPromptsNotImplemented, name)
170167
}
171-
return nil, fmt.Errorf("%w: %s: %w", errors.ErrPromptListFailed, name, err)
168+
return nil, fmt.Errorf("%w: %s: %w", errorsint.ErrPromptListFailed, name, err)
172169
}
173170
if result == nil {
174-
return nil, fmt.Errorf("%w: %s: no result", errors.ErrPromptListFailed, name)
171+
return nil, fmt.Errorf("%w: %s: no result", errorsint.ErrPromptListFailed, name)
175172
}
176173

177174
prompts := make([]Prompt, 0, len(result.Prompts))
@@ -201,7 +198,7 @@ func handleServerPromptGenerate(
201198
) (*GeneratePromptResponse, error) {
202199
mcpClient, clientOk := accessor.Client(serverName)
203200
if !clientOk {
204-
return nil, fmt.Errorf("%w: %s", errors.ErrServerNotFound, serverName)
201+
return nil, fmt.Errorf("%w: %s", errorsint.ErrServerNotFound, serverName)
205202
}
206203

207204
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
@@ -214,16 +211,13 @@ func handleServerPromptGenerate(
214211
},
215212
})
216213
if err != nil {
217-
// TODO: This string matching is fragile and should be replaced with proper JSON-RPC error code checking.
218-
// Once mcp-go preserves JSON-RPC error codes, use errors.Is(err, mcp.ErrMethodNotFound) instead.
219-
// See: https://github.com/mark3labs/mcp-go/issues/593
220-
if strings.Contains(err.Error(), methodNotFoundMessage) {
221-
return nil, fmt.Errorf("%w: %s", errors.ErrPromptsNotImplemented, serverName)
214+
if errors.Is(err, mcp.ErrMethodNotFound) {
215+
return nil, fmt.Errorf("%w: %s", errorsint.ErrPromptsNotImplemented, serverName)
222216
}
223-
return nil, fmt.Errorf("%w: %s: %s: %w", errors.ErrPromptGenerationFailed, serverName, promptName, err)
217+
return nil, fmt.Errorf("%w: %s: %s: %w", errorsint.ErrPromptGenerationFailed, serverName, promptName, err)
224218
}
225219
if result == nil {
226-
return nil, fmt.Errorf("%w: %s: %s: no result", errors.ErrPromptGenerationFailed, serverName, promptName)
220+
return nil, fmt.Errorf("%w: %s: %s: no result", errorsint.ErrPromptGenerationFailed, serverName, promptName)
227221
}
228222

229223
messages := make([]PromptMessage, 0, len(result.Messages))

internal/api/prompts_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ func TestAPI_HandleServerPrompts_MethodNotFound(t *testing.T) {
171171
t.Parallel()
172172

173173
mockClient := &mockMCPClient{
174-
listPromptsError: errors.New("Method not found"),
174+
listPromptsError: mcp.ErrMethodNotFound,
175175
}
176176

177177
accessor := newMockMCPClientAccessor()
@@ -340,7 +340,7 @@ func TestAPI_HandleServerPromptGenerate_MethodNotFound(t *testing.T) {
340340
t.Parallel()
341341

342342
mockClient := &mockMCPClient{
343-
getPromptError: errors.New("Method not found"),
343+
getPromptError: mcp.ErrMethodNotFound,
344344
}
345345

346346
accessor := newMockMCPClientAccessor()

internal/api/resources.go

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ package api
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
6-
"strings"
77
"time"
88

99
"github.com/danielgtaylor/huma/v2"
1010
"github.com/mark3labs/mcp-go/mcp"
1111

1212
"github.com/mozilla-ai/mcpd/v2/internal/contracts"
13-
"github.com/mozilla-ai/mcpd/v2/internal/errors"
13+
errorsint "github.com/mozilla-ai/mcpd/v2/internal/errors"
1414
)
1515

1616
// DomainResource wraps mcp.Resource for API conversion.
@@ -171,7 +171,7 @@ func handleServerResources(
171171
) (*ResourcesResponse, error) {
172172
mcpClient, clientOk := accessor.Client(name)
173173
if !clientOk {
174-
return nil, fmt.Errorf("%w: %s", errors.ErrServerNotFound, name)
174+
return nil, fmt.Errorf("%w: %s", errorsint.ErrServerNotFound, name)
175175
}
176176

177177
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
@@ -186,16 +186,13 @@ func handleServerResources(
186186

187187
result, err := mcpClient.ListResources(ctx, req)
188188
if err != nil {
189-
// TODO: This string matching is fragile and should be replaced with proper JSON-RPC error code checking.
190-
// Once mcp-go preserves JSON-RPC error codes, use errors.Is(err, mcp.ErrMethodNotFound) instead.
191-
// See: https://github.com/mark3labs/mcp-go/issues/593
192-
if strings.Contains(err.Error(), methodNotFoundMessage) {
193-
return nil, fmt.Errorf("%w: %s", errors.ErrResourcesNotImplemented, name)
189+
if errors.Is(err, mcp.ErrMethodNotFound) {
190+
return nil, fmt.Errorf("%w: %s", errorsint.ErrResourcesNotImplemented, name)
194191
}
195-
return nil, fmt.Errorf("%w: %s: %w", errors.ErrResourceListFailed, name, err)
192+
return nil, fmt.Errorf("%w: %s: %w", errorsint.ErrResourceListFailed, name, err)
196193
}
197194
if result == nil {
198-
return nil, fmt.Errorf("%w: %s: no result", errors.ErrResourceListFailed, name)
195+
return nil, fmt.Errorf("%w: %s: no result", errorsint.ErrResourceListFailed, name)
199196
}
200197

201198
resources := make([]Resource, 0, len(result.Resources))
@@ -224,7 +221,7 @@ func handleServerResourceTemplates(
224221
) (*ResourceTemplatesResponse, error) {
225222
mcpClient, clientOk := accessor.Client(name)
226223
if !clientOk {
227-
return nil, fmt.Errorf("%w: %s", errors.ErrServerNotFound, name)
224+
return nil, fmt.Errorf("%w: %s", errorsint.ErrServerNotFound, name)
228225
}
229226

230227
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
@@ -239,16 +236,13 @@ func handleServerResourceTemplates(
239236

240237
result, err := mcpClient.ListResourceTemplates(ctx, req)
241238
if err != nil {
242-
// TODO: This string matching is fragile and should be replaced with proper JSON-RPC error code checking.
243-
// Once mcp-go preserves JSON-RPC error codes, use errors.Is(err, mcp.ErrMethodNotFound) instead.
244-
// See: https://github.com/mark3labs/mcp-go/issues/593
245-
if strings.Contains(err.Error(), methodNotFoundMessage) {
246-
return nil, fmt.Errorf("%w: %s", errors.ErrResourcesNotImplemented, name)
239+
if errors.Is(err, mcp.ErrMethodNotFound) {
240+
return nil, fmt.Errorf("%w: %s", errorsint.ErrResourcesNotImplemented, name)
247241
}
248-
return nil, fmt.Errorf("%w: %s: %w", errors.ErrResourceTemplateListFailed, name, err)
242+
return nil, fmt.Errorf("%w: %s: %w", errorsint.ErrResourceTemplateListFailed, name, err)
249243
}
250244
if result == nil {
251-
return nil, fmt.Errorf("%w: %s: no result", errors.ErrResourceTemplateListFailed, name)
245+
return nil, fmt.Errorf("%w: %s: no result", errorsint.ErrResourceTemplateListFailed, name)
252246
}
253247

254248
templates := make([]ResourceTemplate, 0, len(result.ResourceTemplates))
@@ -277,7 +271,7 @@ func handleServerResourceContent(
277271
) (*ResourceContentResponse, error) {
278272
mcpClient, clientOk := accessor.Client(name)
279273
if !clientOk {
280-
return nil, fmt.Errorf("%w: %s", errors.ErrServerNotFound, name)
274+
return nil, fmt.Errorf("%w: %s", errorsint.ErrServerNotFound, name)
281275
}
282276

283277
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
@@ -289,16 +283,13 @@ func handleServerResourceContent(
289283
},
290284
})
291285
if err != nil {
292-
// TODO: This string matching is fragile and should be replaced with proper JSON-RPC error code checking.
293-
// Once mcp-go preserves JSON-RPC error codes, use errors.Is(err, mcp.ErrMethodNotFound) instead.
294-
// See: https://github.com/mark3labs/mcp-go/issues/593
295-
if strings.Contains(err.Error(), methodNotFoundMessage) {
296-
return nil, fmt.Errorf("%w: %s", errors.ErrResourcesNotImplemented, name)
286+
if errors.Is(err, mcp.ErrMethodNotFound) {
287+
return nil, fmt.Errorf("%w: %s", errorsint.ErrResourcesNotImplemented, name)
297288
}
298-
return nil, fmt.Errorf("%w: %s: %s: %w", errors.ErrResourceReadFailed, name, uri, err)
289+
return nil, fmt.Errorf("%w: %s: %s: %w", errorsint.ErrResourceReadFailed, name, uri, err)
299290
}
300291
if result == nil {
301-
return nil, fmt.Errorf("%w: %s: %s: no result", errors.ErrResourceReadFailed, name, uri)
292+
return nil, fmt.Errorf("%w: %s: %s: no result", errorsint.ErrResourceReadFailed, name, uri)
302293
}
303294

304295
contents := make([]ResourceContent, 0, len(result.Contents))

internal/daemon/daemon.go

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ import (
1414

1515
"github.com/hashicorp/go-hclog"
1616
"github.com/mark3labs/mcp-go/client"
17+
"github.com/mark3labs/mcp-go/client/transport"
1718
"github.com/mark3labs/mcp-go/mcp"
19+
"github.com/mark3labs/mcp-go/util"
1820
"golang.org/x/sync/errgroup"
1921

2022
"github.com/mozilla-ai/mcpd/v2/internal/cmd"
@@ -24,6 +26,8 @@ import (
2426
"github.com/mozilla-ai/mcpd/v2/internal/runtime"
2527
)
2628

29+
var _ util.Logger = (*mcpLoggerAdapter)(nil)
30+
2731
// Daemon manages MCP server lifecycles, client connections, and health monitoring.
2832
// It should only be created using NewDaemon to ensure proper initialization.
2933
type Daemon struct {
@@ -47,6 +51,11 @@ type Daemon struct {
4751
clientHealthCheckInterval time.Duration
4852
}
4953

54+
// mcpLoggerAdapter adapts hclog.Logger to mcp-go's util.Logger interface.
55+
type mcpLoggerAdapter struct {
56+
logger hclog.Logger
57+
}
58+
5059
// NewDaemon creates a new Daemon instance with proper initialization.
5160
// Use this function instead of directly creating a Daemon struct.
5261
func NewDaemon(deps Dependencies, opt ...Option) (*Daemon, error) {
@@ -108,6 +117,12 @@ func NewDaemon(deps Dependencies, opt ...Option) (*Daemon, error) {
108117
}, nil
109118
}
110119

120+
func newMCPLoggerAdapter(logger hclog.Logger) *mcpLoggerAdapter {
121+
return &mcpLoggerAdapter{
122+
logger: logger,
123+
}
124+
}
125+
111126
// StartAndManage is a long-running method that starts configured MCP servers, and the API.
112127
// It launches regular health checks on the MCP servers, with statuses visible via API routes.
113128
func (d *Daemon) StartAndManage(ctx context.Context) error {
@@ -222,7 +237,13 @@ func (d *Daemon) startMCPServer(ctx context.Context, server runtime.Server) erro
222237

223238
logger.Debug("attempting to start server", "binary", runtimeBinary)
224239

225-
stdioClient, err := client.NewStdioMCPClient(runtimeBinary, environ, args...)
240+
mcpLogger := newMCPLoggerAdapter(logger.Named("transport"))
241+
stdioClient, err := client.NewStdioMCPClientWithOptions(
242+
runtimeBinary,
243+
environ,
244+
args,
245+
transport.WithCommandLogger(mcpLogger),
246+
)
226247
if err != nil {
227248
return fmt.Errorf("error starting MCP server: '%s': %w", server.Name(), err)
228249
}
@@ -396,12 +417,37 @@ func (d *Daemon) pingAllServers(ctx context.Context, maxTimeout time.Duration) e
396417
})
397418
}
398419

399-
_ = g.Wait()
400-
if len(errs) > 0 {
401-
return errors.Join(errs...)
420+
// Wait for all pings to complete, but allow interruption if parent context is cancelled.
421+
// This prevents the daemon from hanging during shutdown if a ping is stuck in uninterruptible I/O.
422+
done := make(chan struct{})
423+
go func() {
424+
_ = g.Wait()
425+
close(done)
426+
}()
427+
428+
select {
429+
case <-done:
430+
// All pings completed normally.
431+
if len(errs) > 0 {
432+
return errors.Join(errs...)
433+
}
434+
return nil
435+
case <-ctx.Done():
436+
// Parent context cancelled (shutdown), return immediately without waiting for pings to complete.
437+
// Any stuck pings will eventually time out or be cleaned up when the process exits.
438+
d.logger.Warn("Ping operation interrupted due to context cancellation, some pings may not have completed")
439+
return ctx.Err()
402440
}
441+
}
403442

404-
return nil
443+
// Infof implements mcp-go's Logger interface.
444+
func (a *mcpLoggerAdapter) Infof(format string, v ...any) {
445+
a.logger.Info(fmt.Sprintf(format, v...))
446+
}
447+
448+
// Errorf implements mcp-go's Logger interface.
449+
func (a *mcpLoggerAdapter) Errorf(format string, v ...any) {
450+
a.logger.Error(fmt.Sprintf(format, v...))
405451
}
406452

407453
// IsValidAddr returns an error if the address is not a valid "host:port" string.

0 commit comments

Comments
 (0)