Skip to content

Commit 7ef6075

Browse files
authored
Add JSON schema validation for registry.json (#1438)
Signed-off-by: Dan Barr <[email protected]> Co-authored-by: Dan Barr <[email protected]>
1 parent 6f265c1 commit 7ef6075

File tree

8 files changed

+556
-9
lines changed

8 files changed

+556
-9
lines changed

docs/registry/schema.md

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,50 @@ This can also be used to validate a custom registry file to be used with the
1515

1616
## Schema location
1717

18-
- **File**: [`docs/registry/schema.json`](schema.json)
18+
- **File**: [`pkg/registry/data/schema.json`](../../pkg/registry/data/schema.json)
1919
- **Schema ID**:
20-
`https://raw.githubusercontent.com/stacklok/toolhive/main/docs/registry/schema.json`
20+
`https://raw.githubusercontent.com/stacklok/toolhive/main/pkg/registry/data/schema.json`
2121

2222
## Usage
2323

24-
### Local validation
24+
### Automated validation (Go tests)
25+
26+
The registry is automatically validated against the schema during development
27+
and CI/CD through Go tests. This ensures that any changes to the registry data
28+
are immediately validated.
29+
30+
The validation is implemented in
31+
[`pkg/registry/schema_validation.go`](../../pkg/registry/schema_validation.go)
32+
and tested in
33+
[`pkg/registry/schema_validation_test.go`](../../pkg/registry/schema_validation_test.go).
34+
35+
**Key tests:**
36+
37+
- `TestEmbeddedRegistrySchemaValidation` - Validates the embedded
38+
`registry.json` against the schema
39+
- `TestRegistrySchemaValidation` - Comprehensive test suite with valid and
40+
invalid registry examples
41+
42+
**Running the validation:**
43+
44+
```bash
45+
# Run all schema validation tests
46+
go test -v ./pkg/registry -run ".*Schema.*"
47+
48+
# Run just the embedded registry validation
49+
go test -v ./pkg/registry -run TestEmbeddedRegistrySchemaValidation
50+
51+
# Run all registry tests (includes schema validation)
52+
go test -v ./pkg/registry
53+
```
54+
55+
This validation runs automatically as part of:
56+
57+
- Local development (`go test`)
58+
- CI/CD pipeline (GitHub Actions)
59+
- Pre-commit hooks (if configured)
60+
61+
### Manual validation
2562

2663
#### Using check-jsonschema
2764

@@ -41,7 +78,7 @@ Validate the registry with full format validation:
4178

4279
```bash
4380
# Run from the root of the repository
44-
check-jsonschema --schemafile docs/registry/schema.json pkg/registry/data/registry.json
81+
check-jsonschema --schemafile pkg/registry/data/schema.json pkg/registry/data/registry.json
4582
```
4683

4784
#### Using ajv-cli
@@ -56,7 +93,7 @@ Validate the registry with format validation:
5693

5794
```bash
5895
# Run from the root of the repository
59-
ajv validate -c ajv-formats -s docs/registry/schema.json -d pkg/registry/data/registry.json
96+
ajv validate -c ajv-formats -s pkg/registry/data/schema.json -d pkg/registry/data/registry.json
6097
```
6198

6299
#### Using VS Code
@@ -66,7 +103,7 @@ to the top of any registry JSON file:
66103

67104
```json
68105
{
69-
"$schema": "https://raw.githubusercontent.com/stacklok/toolhive/main/docs/registry/schema.json",
106+
"$schema": "https://raw.githubusercontent.com/stacklok/toolhive/main/pkg/registry/data/schema.json",
70107
...
71108
}
72109
```

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ require (
2525
github.com/ory/fosite v0.49.0
2626
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
2727
github.com/prometheus/client_golang v1.23.0
28+
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
2829
github.com/sigstore/protobuf-specs v0.5.0
2930
github.com/sigstore/sigstore-go v1.1.1
3031
github.com/spf13/viper v1.20.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,6 +1461,8 @@ github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkB
14611461
github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=
14621462
github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsFaodPcyo=
14631463
github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k=
1464+
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
1465+
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY=
14641466
github.com/sassoftware/relic v7.2.1+incompatible h1:Pwyh1F3I0r4clFJXkSI8bOyJINGqpgjJU3DYAZeI05A=
14651467
github.com/sassoftware/relic v7.2.1+incompatible/go.mod h1:CWfAxv73/iLZ17rbyhIEq3K9hs5w6FpNMdUT//qR+zk=
14661468
github.com/sassoftware/relic/v7 v7.6.2 h1:rS44Lbv9G9eXsukknS4mSjIAuuX+lMq/FnStgmZlUv4=

pkg/registry/data/registry.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"$schema": "https://raw.githubusercontent.com/stacklok/toolhive/main/docs/registry/schema.json",
2+
"$schema": "https://raw.githubusercontent.com/stacklok/toolhive/main/pkg/registry/data/schema.json",
33
"last_updated": "2025-08-14T06:59:15Z",
44
"servers": {
55
"arxiv-mcp-server": {

docs/registry/schema.json renamed to pkg/registry/data/schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"$schema": "http://json-schema.org/draft-07/schema#",
3-
"$id": "https://raw.githubusercontent.com/stacklok/toolhive/main/docs/registry/schema.json",
3+
"$id": "https://raw.githubusercontent.com/stacklok/toolhive/main/pkg/registry/data/schema.json",
44
"title": "ToolHive MCP Server Registry Schema",
55
"description": "JSON Schema for the ToolHive MCP server registry. This schema validates the structure and content of registry.json entries for MCP servers. See docs/registry/management.md and docs/registry/heuristics.md for inclusion criteria and management processes.",
66
"type": "object",

pkg/registry/schema_validation.go

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package registry
2+
3+
import (
4+
"embed"
5+
"encoding/json"
6+
"fmt"
7+
"strings"
8+
9+
"github.com/santhosh-tekuri/jsonschema/v5"
10+
)
11+
12+
//go:embed data/schema.json
13+
var embeddedSchemaFS embed.FS
14+
15+
// ValidateRegistrySchema validates registry JSON data against the registry schema
16+
func ValidateRegistrySchema(registryData []byte) error {
17+
// Load the schema from the embedded filesystem
18+
schemaData, err := embeddedSchemaFS.ReadFile("data/schema.json")
19+
if err != nil {
20+
return fmt.Errorf("failed to read embedded registry schema: %w", err)
21+
}
22+
23+
// Compile the schema
24+
compiler := jsonschema.NewCompiler()
25+
schemaID := "file://local/registry-schema.json"
26+
if err := compiler.AddResource(schemaID, strings.NewReader(string(schemaData))); err != nil {
27+
return fmt.Errorf("failed to add schema resource: %w", err)
28+
}
29+
schema, err := compiler.Compile(schemaID)
30+
if err != nil {
31+
return fmt.Errorf("failed to compile registry schema: %w", err)
32+
}
33+
34+
// Parse the registry data
35+
var registryDoc interface{}
36+
if err := json.Unmarshal(registryData, &registryDoc); err != nil {
37+
return fmt.Errorf("failed to parse registry data: %w", err)
38+
}
39+
40+
// Validate the registry data against the schema
41+
if err := schema.Validate(registryDoc); err != nil {
42+
// Extract all validation errors for better user experience
43+
if validationErr, ok := err.(*jsonschema.ValidationError); ok {
44+
return formatValidationErrors(validationErr)
45+
}
46+
return fmt.Errorf("registry schema validation failed: %w", err)
47+
}
48+
49+
return nil
50+
}
51+
52+
// formatValidationErrors formats all validation errors into a comprehensive error message
53+
func formatValidationErrors(validationErr *jsonschema.ValidationError) error {
54+
var errorMessages []string
55+
56+
// Collect all validation errors recursively
57+
collectErrors(validationErr, &errorMessages)
58+
59+
if len(errorMessages) == 0 {
60+
return fmt.Errorf("registry schema validation failed: %s", validationErr.Error())
61+
}
62+
63+
if len(errorMessages) == 1 {
64+
return fmt.Errorf("registry schema validation failed: %s", errorMessages[0])
65+
}
66+
67+
// Format multiple errors
68+
result := fmt.Sprintf("registry schema validation failed with %d errors:\n", len(errorMessages))
69+
for i, msg := range errorMessages {
70+
result += fmt.Sprintf(" %d. %s\n", i+1, msg)
71+
}
72+
73+
return fmt.Errorf("%s", strings.TrimSuffix(result, "\n"))
74+
}
75+
76+
// collectErrors recursively collects all validation error messages
77+
func collectErrors(err *jsonschema.ValidationError, messages *[]string) {
78+
if err == nil {
79+
return
80+
}
81+
82+
// If this error has causes, recurse into them instead of adding this error
83+
// This avoids duplicate parent/child error messages
84+
if len(err.Causes) > 0 {
85+
for _, cause := range err.Causes {
86+
collectErrors(cause, messages)
87+
}
88+
return
89+
}
90+
91+
// This is a leaf error - add it if it has a meaningful message
92+
if err.Message != "" {
93+
// Create a descriptive error message with path context
94+
var pathStr string
95+
if err.InstanceLocation != "" {
96+
pathStr = fmt.Sprintf(" at '%s'", err.InstanceLocation)
97+
}
98+
99+
errorMsg := fmt.Sprintf("%s%s", err.Message, pathStr)
100+
*messages = append(*messages, errorMsg)
101+
}
102+
}
103+
104+
// ValidateEmbeddedRegistry validates the embedded registry.json against the schema
105+
func ValidateEmbeddedRegistry() error {
106+
// Load the embedded registry data
107+
registryData, err := embeddedRegistryFS.ReadFile("data/registry.json")
108+
if err != nil {
109+
return fmt.Errorf("failed to load embedded registry: %w", err)
110+
}
111+
112+
return ValidateRegistrySchema(registryData)
113+
}

0 commit comments

Comments
 (0)