diff --git a/cmd/root_test.go b/cmd/root_test.go index 0757ccadb2d8..e825a36366e0 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -2181,3 +2181,115 @@ func TestDefaultToolsFileBehavior(t *testing.T) { }) } } + +func TestParameterReferenceValidation(t *testing.T) { + ctx, err := testutils.ContextWithNewLogger() + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + // Base template + baseYaml := ` +sources: + dummy-source: + kind: http + baseUrl: http://example.com +tools: + test-tool: + kind: postgres-sql + source: dummy-source + description: test tool + statement: SELECT 1; + parameters: +%s` + + tcs := []struct { + desc string + params string + wantErr bool + errSubstr string + }{ + { + desc: "valid backward reference", + params: ` + - name: source_param + type: string + description: source + - name: copy_param + type: string + description: copy + valueFromParam: source_param`, + wantErr: false, + }, + { + desc: "valid forward reference (out of order)", + params: ` + - name: copy_param + type: string + description: copy + valueFromParam: source_param + - name: source_param + type: string + description: source`, + wantErr: false, + }, + { + desc: "invalid missing reference", + params: ` + - name: copy_param + type: string + description: copy + valueFromParam: non_existent_param`, + wantErr: true, + errSubstr: "references '\"non_existent_param\"' in the 'valueFromParam' field", + }, + { + desc: "invalid self reference", + params: ` + - name: myself + type: string + description: self + valueFromParam: myself`, + wantErr: true, + errSubstr: "parameter \"myself\" cannot copy value from itself", + }, + { + desc: "multiple valid references", + params: ` + - name: a + type: string + description: a + - name: b + type: string + description: b + valueFromParam: a + - name: c + type: string + description: c + valueFromParam: a`, + wantErr: false, + }, + } + + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + // Indent parameters to match YAML structure + yamlContent := fmt.Sprintf(baseYaml, tc.params) + + _, err := parseToolsFile(ctx, []byte(yamlContent)) + + if tc.wantErr { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.errSubstr) { + t.Errorf("error %q does not contain expected substring %q", err.Error(), tc.errSubstr) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + }) + } +} diff --git a/docs/en/resources/embeddingModels/_index.md b/docs/en/resources/embeddingModels/_index.md index 84ddfb9222c2..d9da2b71c3c1 100644 --- a/docs/en/resources/embeddingModels/_index.md +++ b/docs/en/resources/embeddingModels/_index.md @@ -3,13 +3,14 @@ title: "EmbeddingModels" type: docs weight: 2 description: > - EmbeddingModels represent services that transform text into vector embeddings for semantic search. + EmbeddingModels represent services that transform text into vector embeddings + for semantic search. --- EmbeddingModels represent services that generate vector representations of text -data. In the MCP Toolbox, these models enable **Semantic Queries**, -allowing [Tools](../tools/) to automatically convert human-readable text into -numerical vectors before using them in a query. +data. In the MCP Toolbox, these models enable **Semantic Queries**, allowing +[Tools](../tools/) to automatically convert human-readable text into numerical +vectors before using them in a query. This is primarily used in two scenarios: @@ -19,14 +20,33 @@ This is primarily used in two scenarios: - **Semantic Search**: Converting a natural language query into a vector to perform similarity searches. +## Hidden Parameter Duplication (valueFromParam) + +When building tools for vector ingestion, you often need the same input string +twice: + +1. To store the original text in a TEXT column. +1. To generate the vector embedding for a VECTOR column. + +Requesting an Agent (LLM) to output the exact same string twice is inefficient +and error-prone. The `valueFromParam` field solves this by allowing a parameter +to inherit its value from another parameter in the same tool. + +### Key Behaviors + +1. Hidden from Manifest: Parameters with valueFromParam set are excluded from + the tool definition sent to the Agent. The Agent does not know this parameter + exists. +1. Auto-Filled: When the tool is executed, the Toolbox automatically copies the + value from the referenced parameter before processing embeddings. + ## Example The following configuration defines an embedding model and applies it to specific tool parameters. -{{< notice tip >}} -Use environment variable replacement with the format ${ENV_NAME} -instead of hardcoding your API keys into the configuration file. +{{< notice tip >}} Use environment variable replacement with the format +${ENV_NAME} instead of hardcoding your API keys into the configuration file. {{< /notice >}} ### Step 1 - Define an Embedding Model @@ -40,14 +60,12 @@ embeddingModels: model: gemini-embedding-001 apiKey: ${GOOGLE_API_KEY} dimension: 768 - ``` ### Step 2 - Embed Tool Parameters Use the defined embedding model, embed your query parameters using the -`embeddedBy` field. Only string-typed -parameters can be embedded: +`embeddedBy` field. Only string-typed parameters can be embedded: ```yaml tools: @@ -61,10 +79,13 @@ tools: parameters: - name: content type: string + description: The raw text content to be stored in the database. - name: vector_string type: string - description: The text to be vectorized and stored. - embeddedBy: gemini-model # refers to the name of a defined embedding model + # This parameter is hidden from the LLM. + # It automatically copies the value from 'content' and embeds it. + valueFromParam: content + embeddedBy: gemini-model # Semantic search tool search_embedding: diff --git a/internal/server/config.go b/internal/server/config.go index b7042dd4b1fc..4df8cea7c856 100644 --- a/internal/server/config.go +++ b/internal/server/config.go @@ -296,6 +296,43 @@ func (c *ToolConfigs) UnmarshalYAML(ctx context.Context, unmarshal func(interfac return fmt.Errorf("invalid 'kind' field for tool %q (must be a string)", name) } + // validify parameter references + if rawParams, ok := v["parameters"]; ok { + if paramsList, ok := rawParams.([]any); ok { + // Turn params into a map + validParamNames := make(map[string]bool) + for _, rawP := range paramsList { + if pMap, ok := rawP.(map[string]any); ok { + if pName, ok := pMap["name"].(string); ok && pName != "" { + validParamNames[pName] = true + } + } + } + + // Validate references + for i, rawP := range paramsList { + pMap, ok := rawP.(map[string]any) + if !ok { + continue + } + + pName, _ := pMap["name"].(string) + refName, _ := pMap["valueFromParam"].(string) + + if refName != "" { + // Check if the referenced parameter exists + if !validParamNames[refName] { + return fmt.Errorf("tool %q config error: parameter %q (index %d) references '%q' in the 'valueFromParam' field, which is not a defined parameter", name, pName, i, refName) + } + + // Check for self-reference + if refName == pName { + return fmt.Errorf("tool %q config error: parameter %q cannot copy value from itself", name, pName) + } + } + } + } + } yamlDecoder, err := util.NewStrictDecoder(v) if err != nil { return fmt.Errorf("error creating YAML decoder for tool %q: %w", name, err) diff --git a/internal/util/parameters/parameters.go b/internal/util/parameters/parameters.go index b11a4c1b05aa..f75da04a5d5c 100644 --- a/internal/util/parameters/parameters.go +++ b/internal/util/parameters/parameters.go @@ -134,7 +134,12 @@ func ParseParams(ps Parameters, data map[string]any, claimsMap map[string]map[st var err error paramAuthServices := p.GetAuthServices() name := p.GetName() - if len(paramAuthServices) == 0 { + + sourceParamName := p.GetValueFromParam() + if sourceParamName != "" { + v = data[sourceParamName] + + } else if len(paramAuthServices) == 0 { // parse non auth-required parameter var ok bool v, ok = data[name] @@ -318,6 +323,7 @@ type Parameter interface { GetRequired() bool GetAuthServices() []ParamAuthService GetEmbeddedBy() string + GetValueFromParam() string Parse(any) (any, error) Manifest() ParameterManifest McpManifest() (ParameterMcpManifest, []string) @@ -465,6 +471,9 @@ func ParseParameter(ctx context.Context, p map[string]any, paramType string) (Pa func (ps Parameters) Manifest() []ParameterManifest { rtn := make([]ParameterManifest, 0, len(ps)) for _, p := range ps { + if p.GetValueFromParam() != "" { + continue + } rtn = append(rtn, p.Manifest()) } return rtn @@ -476,6 +485,11 @@ func (ps Parameters) McpManifest() (McpToolsSchema, map[string][]string) { authParam := make(map[string][]string) for _, p := range ps { + // If the parameter is sourced from another param, skip it in the MCP manifest + if p.GetValueFromParam() != "" { + continue + } + name := p.GetName() paramManifest, authParamList := p.McpManifest() defaultV := p.GetDefault() @@ -509,6 +523,7 @@ type ParameterManifest struct { Default any `json:"default,omitempty"` AdditionalProperties any `json:"additionalProperties,omitempty"` EmbeddedBy string `json:"embeddedBy,omitempty"` + ValueFromParam string `json:"valueFromParam,omitempty"` } // ParameterMcpManifest represents properties when served as part of a ToolMcpManifest. @@ -531,6 +546,7 @@ type CommonParameter struct { AuthServices []ParamAuthService `yaml:"authServices"` AuthSources []ParamAuthService `yaml:"authSources"` // Deprecated: Kept for compatibility. EmbeddedBy string `yaml:"embeddedBy"` + ValueFromParam string `yaml:"valueFromParam"` } // GetName returns the name specified for the Parameter. @@ -588,10 +604,16 @@ func (p *CommonParameter) IsExcludedValues(v any) bool { return false } +// GetEmbeddedBy returns the embedding model name for the Parameter. func (p *CommonParameter) GetEmbeddedBy() string { return p.EmbeddedBy } +// GetValueFromParam returns the param value to copy from. +func (p *CommonParameter) GetValueFromParam() string { + return p.ValueFromParam +} + // MatchStringOrRegex checks if the input matches the target func MatchStringOrRegex(input, target any) bool { targetS, ok := target.(string) diff --git a/tests/embedding.go b/tests/embedding.go index a370ae84d278..e4e04f3da8cc 100644 --- a/tests/embedding.go +++ b/tests/embedding.go @@ -64,10 +64,11 @@ func AddSemanticSearchConfig(t *testing.T, config map[string]any, toolKind, inse "description": "The text content associated with the vector.", }, map[string]any{ - "name": "text_to_embed", - "type": "string", - "description": "The text content used to generate the vector.", - "embeddedBy": "gemini_model", + "name": "text_to_embed", + "type": "string", + "description": "The text content used to generate the vector.", + "embeddedBy": "gemini_model", + "valueFromParam": "content", }, }, } @@ -108,7 +109,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se name: "HTTP invoke insert_docs", api: "http://127.0.0.1:5000/api/tool/insert_docs/invoke", isMcp: false, - requestBody: `{"content": "The quick brown fox jumps over the lazy dog", "text_to_embed": "The quick brown fox jumps over the lazy dog"}`, + requestBody: `{"content": "The quick brown fox jumps over the lazy dog"}`, want: insertWant, }, { @@ -131,8 +132,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se Params: map[string]any{ "name": "insert_docs", "arguments": map[string]any{ - "content": "The quick brown fox jumps over the lazy dog", - "text_to_embed": "The quick brown fox jumps over the lazy dog", + "content": "The quick brown fox jumps over the lazy dog", }, }, },