Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions cmd/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2181,3 +2181,115 @@ func TestDefaultToolsFileBehavior(t *testing.T) {
})
}
}

func TestParameterReferenceValidation(t *testing.T) {
ctx, err := testutils.ContextWithNewLogger()
if err != nil {
t.Fatalf("unexpected error: %s", err)
}

// Base template
baseYaml := `
sources:
dummy-source:
kind: http
baseUrl: http://example.com
tools:
test-tool:
kind: postgres-sql
source: dummy-source
description: test tool
statement: SELECT 1;
parameters:
%s`

tcs := []struct {
desc string
params string
wantErr bool
errSubstr string
}{
{
desc: "valid backward reference",
params: `
- name: source_param
type: string
description: source
- name: copy_param
type: string
description: copy
valueFromParam: source_param`,
wantErr: false,
},
{
desc: "valid forward reference (out of order)",
params: `
- name: copy_param
type: string
description: copy
valueFromParam: source_param
- name: source_param
type: string
description: source`,
wantErr: false,
},
{
desc: "invalid missing reference",
params: `
- name: copy_param
type: string
description: copy
valueFromParam: non_existent_param`,
wantErr: true,
errSubstr: "references '\"non_existent_param\"' in the 'valueFromParam' field",
},
{
desc: "invalid self reference",
params: `
- name: myself
type: string
description: self
valueFromParam: myself`,
wantErr: true,
errSubstr: "parameter \"myself\" cannot copy value from itself",
},
{
desc: "multiple valid references",
params: `
- name: a
type: string
description: a
- name: b
type: string
description: b
valueFromParam: a
- name: c
type: string
description: c
valueFromParam: a`,
wantErr: false,
},
}

for _, tc := range tcs {
t.Run(tc.desc, func(t *testing.T) {
// Indent parameters to match YAML structure
yamlContent := fmt.Sprintf(baseYaml, tc.params)

_, err := parseToolsFile(ctx, []byte(yamlContent))

if tc.wantErr {
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), tc.errSubstr) {
t.Errorf("error %q does not contain expected substring %q", err.Error(), tc.errSubstr)
}
} else {
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
})
}
}
45 changes: 33 additions & 12 deletions docs/en/resources/embeddingModels/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ title: "EmbeddingModels"
type: docs
weight: 2
description: >
EmbeddingModels represent services that transform text into vector embeddings for semantic search.
EmbeddingModels represent services that transform text into vector embeddings
for semantic search.
---

EmbeddingModels represent services that generate vector representations of text
data. In the MCP Toolbox, these models enable **Semantic Queries**,
allowing [Tools](../tools/) to automatically convert human-readable text into
numerical vectors before using them in a query.
data. In the MCP Toolbox, these models enable **Semantic Queries**, allowing
[Tools](../tools/) to automatically convert human-readable text into numerical
vectors before using them in a query.

This is primarily used in two scenarios:

Expand All @@ -19,14 +20,33 @@ This is primarily used in two scenarios:
- **Semantic Search**: Converting a natural language query into a vector to
perform similarity searches.

## Hidden Parameter Duplication (valueFromParam)

When building tools for vector ingestion, you often need the same input string
twice:

1. To store the original text in a TEXT column.
1. To generate the vector embedding for a VECTOR column.

Requesting an Agent (LLM) to output the exact same string twice is inefficient
and error-prone. The `valueFromParam` field solves this by allowing a parameter
to inherit its value from another parameter in the same tool.

### Key Behaviors

1. Hidden from Manifest: Parameters with valueFromParam set are excluded from
the tool definition sent to the Agent. The Agent does not know this parameter
exists.
1. Auto-Filled: When the tool is executed, the Toolbox automatically copies the
value from the referenced parameter before processing embeddings.

## Example

The following configuration defines an embedding model and applies it to
specific tool parameters.

{{< notice tip >}}
Use environment variable replacement with the format ${ENV_NAME}
instead of hardcoding your API keys into the configuration file.
{{< notice tip >}} Use environment variable replacement with the format
${ENV_NAME} instead of hardcoding your API keys into the configuration file.
{{< /notice >}}

### Step 1 - Define an Embedding Model
Expand All @@ -40,14 +60,12 @@ embeddingModels:
model: gemini-embedding-001
apiKey: ${GOOGLE_API_KEY}
dimension: 768

```

### Step 2 - Embed Tool Parameters

Use the defined embedding model, embed your query parameters using the
`embeddedBy` field. Only string-typed
parameters can be embedded:
`embeddedBy` field. Only string-typed parameters can be embedded:

```yaml
tools:
Expand All @@ -61,10 +79,13 @@ tools:
parameters:
- name: content
type: string
description: The raw text content to be stored in the database.
- name: vector_string
type: string
description: The text to be vectorized and stored.
embeddedBy: gemini-model # refers to the name of a defined embedding model
# This parameter is hidden from the LLM.
# It automatically copies the value from 'content' and embeds it.
valueFromParam: content
embeddedBy: gemini-model

# Semantic search tool
search_embedding:
Expand Down
37 changes: 37 additions & 0 deletions internal/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,43 @@ func (c *ToolConfigs) UnmarshalYAML(ctx context.Context, unmarshal func(interfac
return fmt.Errorf("invalid 'kind' field for tool %q (must be a string)", name)
}

// validify parameter references
if rawParams, ok := v["parameters"]; ok {
if paramsList, ok := rawParams.([]any); ok {
// Turn params into a map
validParamNames := make(map[string]bool)
for _, rawP := range paramsList {
if pMap, ok := rawP.(map[string]any); ok {
if pName, ok := pMap["name"].(string); ok && pName != "" {
validParamNames[pName] = true
}
}
}

// Validate references
for i, rawP := range paramsList {
pMap, ok := rawP.(map[string]any)
if !ok {
continue
}

pName, _ := pMap["name"].(string)
refName, _ := pMap["valueFromParam"].(string)

if refName != "" {
// Check if the referenced parameter exists
if !validParamNames[refName] {
return fmt.Errorf("tool %q config error: parameter %q (index %d) references '%q' in the 'valueFromParam' field, which is not a defined parameter", name, pName, i, refName)
}

// Check for self-reference
if refName == pName {
return fmt.Errorf("tool %q config error: parameter %q cannot copy value from itself", name, pName)
}
}
}
}
}
yamlDecoder, err := util.NewStrictDecoder(v)
if err != nil {
return fmt.Errorf("error creating YAML decoder for tool %q: %w", name, err)
Expand Down
24 changes: 23 additions & 1 deletion internal/util/parameters/parameters.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,12 @@ func ParseParams(ps Parameters, data map[string]any, claimsMap map[string]map[st
var err error
paramAuthServices := p.GetAuthServices()
name := p.GetName()
if len(paramAuthServices) == 0 {

sourceParamName := p.GetValueFromParam()
if sourceParamName != "" {
v = data[sourceParamName]

} else if len(paramAuthServices) == 0 {
// parse non auth-required parameter
var ok bool
v, ok = data[name]
Expand Down Expand Up @@ -318,6 +323,7 @@ type Parameter interface {
GetRequired() bool
GetAuthServices() []ParamAuthService
GetEmbeddedBy() string
GetValueFromParam() string
Parse(any) (any, error)
Manifest() ParameterManifest
McpManifest() (ParameterMcpManifest, []string)
Expand Down Expand Up @@ -465,6 +471,9 @@ func ParseParameter(ctx context.Context, p map[string]any, paramType string) (Pa
func (ps Parameters) Manifest() []ParameterManifest {
rtn := make([]ParameterManifest, 0, len(ps))
for _, p := range ps {
if p.GetValueFromParam() != "" {
continue
}
rtn = append(rtn, p.Manifest())
}
return rtn
Expand All @@ -476,6 +485,11 @@ func (ps Parameters) McpManifest() (McpToolsSchema, map[string][]string) {
authParam := make(map[string][]string)

for _, p := range ps {
// If the parameter is sourced from another param, skip it in the MCP manifest
if p.GetValueFromParam() != "" {
continue
}

name := p.GetName()
paramManifest, authParamList := p.McpManifest()
defaultV := p.GetDefault()
Expand Down Expand Up @@ -509,6 +523,7 @@ type ParameterManifest struct {
Default any `json:"default,omitempty"`
AdditionalProperties any `json:"additionalProperties,omitempty"`
EmbeddedBy string `json:"embeddedBy,omitempty"`
ValueFromParam string `json:"valueFromParam,omitempty"`
}

// ParameterMcpManifest represents properties when served as part of a ToolMcpManifest.
Expand All @@ -531,6 +546,7 @@ type CommonParameter struct {
AuthServices []ParamAuthService `yaml:"authServices"`
AuthSources []ParamAuthService `yaml:"authSources"` // Deprecated: Kept for compatibility.
EmbeddedBy string `yaml:"embeddedBy"`
ValueFromParam string `yaml:"valueFromParam"`
}

// GetName returns the name specified for the Parameter.
Expand Down Expand Up @@ -588,10 +604,16 @@ func (p *CommonParameter) IsExcludedValues(v any) bool {
return false
}

// GetEmbeddedBy returns the embedding model name for the Parameter.
func (p *CommonParameter) GetEmbeddedBy() string {
return p.EmbeddedBy
}

// GetValueFromParam returns the param value to copy from.
func (p *CommonParameter) GetValueFromParam() string {
return p.ValueFromParam
}

// MatchStringOrRegex checks if the input matches the target
func MatchStringOrRegex(input, target any) bool {
targetS, ok := target.(string)
Expand Down
14 changes: 7 additions & 7 deletions tests/embedding.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ func AddSemanticSearchConfig(t *testing.T, config map[string]any, toolKind, inse
"description": "The text content associated with the vector.",
},
map[string]any{
"name": "text_to_embed",
"type": "string",
"description": "The text content used to generate the vector.",
"embeddedBy": "gemini_model",
"name": "text_to_embed",
"type": "string",
"description": "The text content used to generate the vector.",
"embeddedBy": "gemini_model",
"valueFromParam": "content",
},
},
}
Expand Down Expand Up @@ -108,7 +109,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se
name: "HTTP invoke insert_docs",
api: "http://127.0.0.1:5000/api/tool/insert_docs/invoke",
isMcp: false,
requestBody: `{"content": "The quick brown fox jumps over the lazy dog", "text_to_embed": "The quick brown fox jumps over the lazy dog"}`,
requestBody: `{"content": "The quick brown fox jumps over the lazy dog"}`,
want: insertWant,
},
{
Expand All @@ -131,8 +132,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se
Params: map[string]any{
"name": "insert_docs",
"arguments": map[string]any{
"content": "The quick brown fox jumps over the lazy dog",
"text_to_embed": "The quick brown fox jumps over the lazy dog",
"content": "The quick brown fox jumps over the lazy dog",
},
},
},
Expand Down
Loading