From 2a109e26803410591f6c0ce33d1ff3e3b6fdcd49 Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 19:21:26 +0530 Subject: [PATCH 1/8] updated custom metric docs to include multi-turn --- docs/docs/metrics-custom.mdx | 170 +++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 8 deletions(-) diff --git a/docs/docs/metrics-custom.mdx b/docs/docs/metrics-custom.mdx index cc6080413..f3cd342a0 100644 --- a/docs/docs/metrics-custom.mdx +++ b/docs/docs/metrics-custom.mdx @@ -9,7 +9,8 @@ sidebar_label: Do it yourself import MetricTagsDisplayer from '@site/src/components/MetricTagsDisplayer'; -import { Timeline, TimelineItem } from '@site/src/components/Timeline'; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; @@ -31,14 +32,14 @@ There are many ways one can implement an LLM evaluation metric. Here is a [great ## Rules To Follow When Creating A Custom Metric - - - - ### 1. Inherit the `BaseMetric` class To begin, create a class that inherits from `deepeval`'s `BaseMetric` class: + + + + ```python from deepeval.metrics import BaseMetric @@ -46,11 +47,27 @@ class CustomMetric(BaseMetric): ... ``` -This is important because the `BaseMetric` class will help `deepeval` acknowledge your custom metric during evaluation. +This is important because the `BaseMetric` class will help `deepeval` acknowledge your custom metric as a single-turn metric during evaluation. + + + + +```python +from deepeval.metrics import BaseConversationalMetric + +class CustomConversationalMetric(BaseConversationalMetric): + ... +``` + +This is important because the `BaseConversationalMetric` class will help `deepeval` acknowledge your custom metric as a multi-turn metric during evaluation. + + + + ### 2. Implement the `__init__()` method -The `BaseMetric` class gives your custom metric a few properties that you can configure and be displayed post-evaluation, either locally or on Confident AI. +The `BaseMetric` / `BaseConversationalMetric` class gives your custom metric a few properties that you can configure and be displayed post-evaluation, either locally or on Confident AI. An example is the `threshold` property, which determines whether the `LLMTestCase` being evaluated has passed or not. Although **the `threshold` property is all you need to make a custom metric functional**, here are some additional properties for those who want even more customizability: @@ -65,6 +82,10 @@ Don't read too much into the advanced properties for now, we'll go over how they The `__init__()` method is a great place to set these properties: + + + + ```python from deepeval.metrics import BaseMetric @@ -86,6 +107,33 @@ class CustomMetric(BaseMetric): self.async_mode = async_mode ``` + + + +```python +from deepeval.metrics import BaseConversationalMetric + +class CustomConversationalMetric(BaseConversationalMetric): + def __init__( + self, + threshold: float = 0.5, + # Optional + evaluation_model: str, + include_reason: bool = True, + strict_mode: bool = True, + async_mode: bool = True + ): + self.threshold = threshold + # Optional + self.evaluation_model = evaluation_model + self.include_reason = include_reason + self.strict_mode = strict_mode + self.async_mode = async_mode +``` + + + + ### 3. Implement the `measure()` and `a_measure()` methods The `measure()` and `a_measure()` method is where all the evaluation happens. In `deepeval`, evaluation is the process of applying a metric to an `LLMTestCase` to generate a score and optionally a reason for the score (if you're using an LLM) based on the scoring algorithm. @@ -114,6 +162,12 @@ Both `measure()` and `a_measure()` **MUST**: You can also optionally set `self.reason` in the measure methods (if you're using an LLM for evaluation), or wrap everything in a `try` block to catch any exceptions and set it to `self.error`. Here's a hypothetical example: + + + + + + ```python from deepeval.metrics import BaseMetric from deepeval.test_case import LLMTestCase @@ -150,6 +204,49 @@ class CustomMetric(BaseMetric): raise ``` + + + +```python +from deepeval.metrics import BaseConversationalMetric +from deepeval.test_case import ConversationalTestCase + +class CustomConversationalMetric(BaseConversationalMetric): + ... + + def measure(self, test_case: ConversationalTestCase) -> float: + # Although not required, we recommend catching errors + # in a try block + try: + self.score = generate_hypothetical_score(test_case) + if self.include_reason: + self.reason = generate_hypothetical_reason(test_case) + self.success = self.score >= self.threshold + return self.score + except Exception as e: + # set metric error and re-raise it + self.error = str(e) + raise + + async def a_measure(self, test_case: ConversationalTestCase) -> float: + # Although not required, we recommend catching errors + # in a try block + try: + self.score = await async_generate_hypothetical_score(test_case) + if self.include_reason: + self.reason = await async_generate_hypothetical_reason(test_case) + self.success = self.score >= self.threshold + return self.score + except Exception as e: + # set metric error and re-raise it + self.error = str(e) + raise +``` + + + + + :::tip Often times, the blocking part of an LLM evaluation metric stems from the API calls made to your LLM provider (such as OpenAI's API endpoints), and so ultimately you'll have to ensure that LLM inference can indeed be made asynchronous. @@ -174,6 +271,10 @@ You can also [click here to find an example of offloading LLM inference to a sep Under the hood, `deepeval` calls the `is_successful()` method to determine the status of your metric for a given `LLMTestCase`. We recommend copy and pasting the code below directly as your `is_successful()` implementation: + + + + ```python from deepeval.metrics import BaseMetric from deepeval.test_case import LLMTestCase @@ -185,13 +286,46 @@ class CustomMetric(BaseMetric): if self.error is not None: self.success = False else: - return self.success + try: + self.success = self.score >= self.threshold + except TypeError: + self.success = False + return self.success ``` + + + +```python +from deepeval.metrics import BaseConversationalMetric +from deepeval.test_case import ConversationalTestCase + +class CustomConversationalMetric(BaseConversationalMetric): + ... + + def is_successful(self) -> bool: + if self.error is not None: + self.success = False + else: + try: + self.success = self.score >= self.threshold + except TypeError: + self.success = False + return self.success +``` + + + + + ### 5. Name Your Custom Metric Probably the easiest step, all that's left is to name your custom metric: + + + + ```python from deepeval.metrics import BaseMetric from deepeval.test_case import LLMTestCase @@ -204,6 +338,26 @@ class CustomMetric(BaseMetric): return "My Custom Metric" ``` + + + +```python +from deepeval.metrics import BaseConversationalMetric +from deepeval.test_case import ConversationalTestCase + +class CustomConversationalMetric(BaseConversationalMetric): + ... + + @property + def __name__(self): + return "My Custom Metric" +``` + + + + + + **Congratulations 🎉!** You've just learnt how to build a custom metric that is 100% integrated with `deepeval`'s ecosystem. In the following section, we'll go through a few real-life examples. ## More Examples From ea87ef2ac4ef78111078beeb86b337652e4159e7 Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 19:21:47 +0530 Subject: [PATCH 2/8] added openrouter docs --- docs/integrations/models/openrouter.mdx | 12 ++++++------ docs/sidebarIntegrations.js | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/integrations/models/openrouter.mdx b/docs/integrations/models/openrouter.mdx index 2988ad51f..d1fcef265 100644 --- a/docs/integrations/models/openrouter.mdx +++ b/docs/integrations/models/openrouter.mdx @@ -1,5 +1,5 @@ --- -# id: openrouter +id: openrouter title: OpenRouter sidebar_label: OpenRouter --- @@ -43,7 +43,7 @@ model = OpenRouterModel( model="openai/gpt-4.1", api_key="your-openrouter-api-key", # Optional: override the default OpenRouter endpoint - # base_url="https://openrouter.ai/api/v1", + base_url="https://openrouter.ai/api/v1", # Optional: pass OpenRouter headers via **kwargs default_headers={ "HTTP-Referer": "https://your-site.com", @@ -59,12 +59,12 @@ There are **ZERO** mandatory and **SEVEN** optional parameters when creating an - [Optional] `model`: A string specifying the OpenRouter model to use. Defaults to `OPENROUTER_MODEL_NAME` if set; otherwise falls back to "openai/gpt-4.1". - [Optional] `api_key`: A string specifying your OpenRouter API key for authentication. Defaults to `OPENROUTER_API_KEY` if not passed; raises an error at runtime if unset. - [Optional] `base_url`: A string specifying the base URL for the OpenRouter API endpoint. Defaults to `OPENROUTER_BASE_URL` if set; otherwise falls back to "https://openrouter.ai/api/v1". -- [Optional] `temperature`: A float specifying the model temperature. Defaults to `TEMPERATURE` if not passed; falls back to `0.0` if unset; raises if < 0. -- [Optional] `cost_per_input_token`: A float specifying the cost for each input token for the provided model. Defaults to `OPENROUTER_COST_PER_INPUT_TOKEN` if set. -- [Optional] `cost_per_output_token`: A float specifying the cost for each output token for the provided model. Defaults to `OPENROUTER_COST_PER_OUTPUT_TOKEN` if set. +- [Optional] `temperature`: A float specifying the model temperature. Defaults to `TEMPERATURE` if not passed; falls back to `0.0` if unset. +- [Optional] `cost_per_input_token`: A float specifying the cost for each input token for the provided model. Defaults to `OPENROUTER_COST_PER_INPUT_TOKEN` if not passed; raises an error at runtime if unset. +- [Optional] `cost_per_output_token`: A float specifying the cost for each output token for the provided model. Defaults to `OPENROUTER_COST_PER_OUTPUT_TOKEN` if not passed; raises an error at runtime if unset. - [Optional] `generation_kwargs`: A dictionary of additional generation parameters forwarded to OpenRouter's `chat.completions.create(...)` call -Any additional **kwargs you would like to use for your OpenRouter client can be passed directly to OpenRouterModel(...). These are forwarded to the underlying OpenAI client constructor. We recommend double-checking the parameters and headers supported by your chosen model in the [official OpenRouter docs](https://openrouter.ai/docs). +Any additional `**kwargs` you would like to use for your `OpenRouter` client can be passed directly to `OpenRouterModel(...)`. These are forwarded to the underlying OpenAI client constructor. We recommend double-checking the parameters and headers supported by your chosen model in the [official OpenRouter docs](https://openrouter.ai/docs). :::tip Pass headers specific to OpenRouter via kwargs: diff --git a/docs/sidebarIntegrations.js b/docs/sidebarIntegrations.js index a8a4ab332..fb5c97207 100644 --- a/docs/sidebarIntegrations.js +++ b/docs/sidebarIntegrations.js @@ -24,6 +24,7 @@ module.exports = { 'models/openai', 'models/azure-openai', 'models/ollama', + 'models/openrouter', 'models/anthropic', 'models/amazon-bedrock', 'models/gemini', From e3423d07d0abbfbee08453a1c51103f64ee5832c Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 20:18:02 +0530 Subject: [PATCH 3/8] . --- deepeval/config/settings.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/deepeval/config/settings.py b/deepeval/config/settings.py index 1308e32f1..c4207ddf5 100644 --- a/deepeval/config/settings.py +++ b/deepeval/config/settings.py @@ -322,7 +322,6 @@ def __setattr__(self, name: str, value): description="Path to the directory used by DeepEval to store cache files. If set, this overrides the default cache location. The directory will be created if it does not exist.", ) - # Display / Truncation DEEPEVAL_MAXLEN_TINY: Optional[int] = Field( 40, @@ -1022,7 +1021,12 @@ def DEEPEVAL_TASK_GATHER_BUFFER_SECONDS(self) -> float: def _coerce_yes_no(cls, v): return None if v is None else parse_bool(v, default=False) - @field_validator("DEEPEVAL_RESULTS_FOLDER", "ENV_DIR_PATH", "DEEPEVAL_CACHE_FOLDER", mode="before") + @field_validator( + "DEEPEVAL_RESULTS_FOLDER", + "ENV_DIR_PATH", + "DEEPEVAL_CACHE_FOLDER", + mode="before", + ) @classmethod def _coerce_path(cls, v): if v is None: From bb04c5ce2c46c71bee1d973482759e2160fb3f00 Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 22:13:05 +0530 Subject: [PATCH 4/8] added tests for output schema and tools --- deepeval/prompt/utils.py | 8 +- tests/test_confident/test_prompt.py | 844 +++++++++++++++++++++++++++- 2 files changed, 846 insertions(+), 6 deletions(-) diff --git a/deepeval/prompt/utils.py b/deepeval/prompt/utils.py index 8d2f7e0fb..a3929cfd0 100644 --- a/deepeval/prompt/utils.py +++ b/deepeval/prompt/utils.py @@ -256,7 +256,13 @@ def build_node(field_list: List[OutputSchemaField]) -> Dict[str, Any]: field_type = ( field.type.value if hasattr(field.type, "value") else field.type ) - field_schema = {"type": map_type(field.type)} + normalized_type = ( + SchemaDataType(field_type) + if not isinstance(field_type, SchemaDataType) + else field_type + ) + + field_schema = {"type": map_type(normalized_type)} # Add description if available if field.description: diff --git a/tests/test_confident/test_prompt.py b/tests/test_confident/test_prompt.py index d2a129bfc..918c19339 100644 --- a/tests/test_confident/test_prompt.py +++ b/tests/test_confident/test_prompt.py @@ -1,7 +1,8 @@ import uuid import time +from pydantic import BaseModel from unittest.mock import patch -from deepeval.prompt import Prompt +from deepeval.prompt import Prompt, Tool from deepeval.prompt.api import ( PromptType, PromptInterpolationType, @@ -11,11 +12,61 @@ ReasoningEffort, OutputType, Verbosity, + ToolMode ) from deepeval.confident.api import Api from deepeval.metrics.faithfulness.schema import FaithfulnessVerdict +class NestedObject(BaseModel): + nested_field: str + nested_number: int + + +class SimpleSchema(BaseModel): + name: str + value: float + + +class ComplexOutputSchema(BaseModel): + title: str + count: int + score: float + active: bool + metadata: NestedObject + + +class DeeplyNestedObject(BaseModel): + level3_field: str + + +class MiddleNestedObject(BaseModel): + level2_field: int + deep_object: DeeplyNestedObject + + +class VeryComplexSchema(BaseModel): + id: str + simple_field: str + number_field: int + float_field: float + bool_field: bool + nested_obj: MiddleNestedObject + + +class ToolInputSchema(BaseModel): + query: str + max_results: int + include_metadata: bool + + +class UpdatedToolInputSchema(BaseModel): + query: str + max_results: int + include_metadata: bool + new_field: str + + class TestPromptText: ALIAS = "test_prompt_text" ALIAS_WITH_INTERPOLATION_TYPE = "test_prompt_text_interpolation_type" @@ -136,7 +187,7 @@ def test_version_polling(self): time.sleep(5) # polls twice in 5 seconds - assert spy_api.call_count == 3 # 1 for pull, 2 for polling + assert spy_api.call_count >= 2 # 1 for pull, 2 for polling prompt._stop_polling() def test_label_polling(self): @@ -148,9 +199,349 @@ def test_label_polling(self): time.sleep(5) # polls twice in 5 seconds assert prompt.version == self.LABEL_VERSION - assert spy_api.call_count == 3 # 1 for pull, 2 for polling + assert spy_api.call_count >= 2 # 1 for pull, 2 for polling prompt._stop_polling() + def test_push_with_simple_output_schema(self): + """Test pushing text prompt with simple output schema""" + ALIAS = "test_prompt_text_simple_schema" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + prompt.push( + text=f"Generate data {UUID}", + output_type=OutputType.SCHEMA, + output_schema=SimpleSchema, + ) + + prompt.pull() + + # Verify output schema + assert prompt.output_type == OutputType.SCHEMA + assert prompt.output_schema is not None + assert hasattr(prompt.output_schema, "model_fields") + + expected_fields = {"name", "value"} + actual_fields = set(prompt.output_schema.model_fields.keys()) + assert actual_fields == expected_fields + + # Verify field types + assert prompt.output_schema.model_fields["name"].annotation == str + assert prompt.output_schema.model_fields["value"].annotation == float + + def test_push_with_nested_output_schema(self): + """Test pushing text prompt with nested output schema""" + ALIAS = "test_prompt_text_nested_schema" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + prompt.push( + text=f"Generate complex data {UUID}", + output_type=OutputType.SCHEMA, + output_schema=ComplexOutputSchema, + ) + + prompt.pull() + + # Verify output schema + assert prompt.output_type == OutputType.SCHEMA + assert prompt.output_schema is not None + + expected_fields = {"title", "count", "score", "active", "metadata"} + actual_fields = set(prompt.output_schema.model_fields.keys()) + assert actual_fields == expected_fields + + # Verify nested object + nested_type = prompt.output_schema.model_fields["metadata"].annotation + assert hasattr(nested_type, "model_fields") + nested_fields = set(nested_type.model_fields.keys()) + assert nested_fields == {"nested_field", "nested_number"} + + def test_push_with_deeply_nested_output_schema(self): + """Test pushing text prompt with deeply nested output schema (3 levels)""" + ALIAS = "test_prompt_text_deep_nested_schema" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + prompt.push( + text=f"Generate very complex data {UUID}", + output_type=OutputType.SCHEMA, + output_schema=VeryComplexSchema, + ) + + prompt.pull() + + # Verify top level schema + assert prompt.output_schema is not None + top_fields = set(prompt.output_schema.model_fields.keys()) + assert top_fields == {"id", "simple_field", "number_field", "float_field", "bool_field", "nested_obj"} + + # Verify level 2 nested object + level2_type = prompt.output_schema.model_fields["nested_obj"].annotation + assert hasattr(level2_type, "model_fields") + level2_fields = set(level2_type.model_fields.keys()) + assert level2_fields == {"level2_field", "deep_object"} + + # Verify level 3 nested object + level3_type = level2_type.model_fields["deep_object"].annotation + assert hasattr(level3_type, "model_fields") + level3_fields = set(level3_type.model_fields.keys()) + assert level3_fields == {"level3_field"} + + def test_push_single_tool(self): + """Test pushing text prompt with a single tool""" + ALIAS = "test_prompt_text_single_tool" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + tool = Tool( + name="SearchTool", + description="A tool for searching", + mode=ToolMode.STRICT, + structured_schema=ToolInputSchema, + ) + + prompt.push( + text=f"Use the search tool {UUID}", + tools=[tool], + ) + + prompt.pull() + + # Verify tools + assert prompt.tools is not None + assert len(prompt.tools) == 1 + + pulled_tool = prompt.tools[0] + assert pulled_tool.name == "SearchTool" + assert pulled_tool.description == "A tool for searching" + assert pulled_tool.mode == ToolMode.STRICT + + # Verify tool schema + assert pulled_tool.structured_schema is not None + assert pulled_tool.structured_schema.fields is not None + + # Check input_schema property + input_schema = pulled_tool.input_schema + assert input_schema["type"] == "object" + assert "query" in input_schema["properties"] + assert "max_results" in input_schema["properties"] + assert "include_metadata" in input_schema["properties"] + + def test_push_multiple_tools(self): + """Test pushing text prompt with multiple tools""" + ALIAS = "test_prompt_text_multiple_tools" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + tool1 = Tool( + name="SearchTool", + description="Search tool", + mode=ToolMode.STRICT, + structured_schema=ToolInputSchema, + ) + + tool2 = Tool( + name="AnalysisTool", + description="Analysis tool", + mode=ToolMode.NO_ADDITIONAL, + structured_schema=SimpleSchema, + ) + + prompt.push( + text=f"Use multiple tools {UUID}", + tools=[tool1, tool2], + ) + + prompt.pull() + + # Verify tools + assert prompt.tools is not None + assert len(prompt.tools) == 2 + + tool_names = {tool.name for tool in prompt.tools} + assert tool_names == {"SearchTool", "AnalysisTool"} + + # Verify each tool + for tool in prompt.tools: + assert tool.structured_schema is not None + assert tool.input_schema is not None + + def test_update_tool_by_name(self): + """Test updating a tool with the same name (should replace it)""" + ALIAS = "test_prompt_text_update_tool" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + # Push initial tool + original_tool = Tool( + name="SearchTool", + description="Original search tool", + mode=ToolMode.STRICT, + structured_schema=ToolInputSchema, + ) + + prompt.push( + text=f"Initial tool push {UUID}", + tools=[original_tool], + ) + + prompt.pull() + + initial_tool = prompt.tools[0] + assert initial_tool.description == "Original search tool" + + # Update with new tool (same name) + updated_tool = Tool( + name="SearchTool", # Same name + description="Updated search tool", # Different description + mode=ToolMode.NO_ADDITIONAL, # Different mode + structured_schema=UpdatedToolInputSchema, # Different schema + ) + + prompt.update( + version="latest", + tools=[updated_tool], + ) + + prompt.pull() + + # Verify tool was updated + assert prompt.tools is not None + assert len(prompt.tools) == 1 + + final_tool = prompt.tools[0] + assert final_tool.name == "SearchTool" + assert final_tool.description == "Updated search tool" + assert final_tool.mode == ToolMode.NO_ADDITIONAL + + # Verify schema was updated + input_schema = final_tool.input_schema + assert "new_field" in input_schema["properties"] + + def test_push_output_schema_and_tools(self): + """Test pushing both output schema and tools together""" + ALIAS = "test_prompt_text_schema_and_tools" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + tool = Tool( + name="DataTool", + description="Data processing tool", + mode=ToolMode.STRICT, + structured_schema=SimpleSchema, + ) + + prompt.push( + text=f"Process data with tool {UUID}", + output_type=OutputType.SCHEMA, + output_schema=ComplexOutputSchema, + tools=[tool], + ) + prompt.output_schema = None + prompt.tools = None + prompt.pull() + + # Verify output schema + assert prompt.output_type == OutputType.SCHEMA + assert prompt.output_schema is not None + assert "title" in prompt.output_schema.model_fields + + # Verify tool + assert prompt.tools is not None + assert len(prompt.tools) == 1 + assert prompt.tools[0].name == "DataTool" + + def test_pull_preserves_tool_details(self): + """Test that pulling preserves all tool details including schema structure""" + ALIAS = "test_prompt_text_tool_preservation" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + tool = Tool( + name="DetailedTool", + description="A tool with detailed schema", + mode=ToolMode.STRICT, + structured_schema=VeryComplexSchema, + ) + + prompt.push( + text=f"Detailed tool test {UUID}", + tools=[tool], + ) + + # Pull multiple times to ensure consistency + for _ in range(3): + prompt.pull() + + assert prompt.tools is not None + assert len(prompt.tools) == 1 + + pulled_tool = prompt.tools[0] + assert pulled_tool.name == "DetailedTool" + assert pulled_tool.description == "A tool with detailed schema" + assert pulled_tool.mode == ToolMode.STRICT + + # Verify input schema has all fields + input_schema = pulled_tool.input_schema + assert "id" in input_schema["properties"] + assert "simple_field" in input_schema["properties"] + assert "nested_obj" in input_schema["properties"] + + # Verify nested structure + nested_props = input_schema["properties"]["nested_obj"]["properties"] + assert "level2_field" in nested_props + assert "deep_object" in nested_props + + def test_cache_preserves_output_schema_and_tools(self): + """Test that caching preserves output schema and tools""" + ALIAS = "test_prompt_text_cache_schema_tools" + prompt1 = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + tool = Tool( + name="CachedTool", + description="Tool for cache test", + mode=ToolMode.STRICT, + structured_schema=SimpleSchema, + ) + + prompt1.push( + text=f"Cache test {UUID}", + output_type=OutputType.SCHEMA, + output_schema=ComplexOutputSchema, + tools=[tool], + ) + + # Pull and cache + prompt1.pull(write_to_cache=False) + version = prompt1.version + + # Load from cache + prompt2 = Prompt(alias=ALIAS) + prompt2.pull(version=version) + + # Verify output schema preserved + assert prompt2.output_schema is not None + assert set(prompt2.output_schema.model_fields.keys()) == set( + prompt1.output_schema.model_fields.keys() + ) + + # Verify tools preserved + assert prompt2.tools is not None + assert len(prompt2.tools) == len(prompt1.tools) + assert prompt2.tools[0].name == prompt1.tools[0].name + assert prompt2.tools[0].mode == prompt1.tools[0].mode + class TestPromptList: ALIAS = "test_prompt_list" @@ -326,7 +717,7 @@ def test_version_polling(self): time.sleep(5) # polls twice in 5 seconds - assert spy_api.call_count == 3 # 1 for pull, 2 for polling + assert spy_api.call_count >= 2 # 1 for pull, 2 for polling prompt._stop_polling() def test_label_polling(self): @@ -338,7 +729,7 @@ def test_label_polling(self): time.sleep(5) # polls twice in 5 seconds assert prompt.version == self.LABEL_VERSION - assert spy_api.call_count == 3 # 1 for pull, 2 for polling + assert spy_api.call_count >= 2 # 1 for pull, 2 for polling prompt._stop_polling() def test_model_settings_pull(self): @@ -442,3 +833,446 @@ def test_cache_preserves_settings(self): assert cached_fields == original_fields else: assert prompt2.output_schema == original_output_schema + + def test_push_with_simple_output_schema(self): + """Test pushing list prompt with simple output schema""" + ALIAS = "test_prompt_list_simple_schema" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Generate data {UUID}"), + PromptMessage(role="assistant", content=f"Here's the data {UUID}"), + ] + + prompt.push( + messages=messages, + output_type=OutputType.SCHEMA, + output_schema=SimpleSchema, + ) + + prompt.pull() + + # Verify output schema + assert prompt.output_type == OutputType.SCHEMA + assert prompt.output_schema is not None + + expected_fields = {"name", "value"} + actual_fields = set(prompt.output_schema.model_fields.keys()) + assert actual_fields == expected_fields + + def test_push_with_nested_output_schema(self): + """Test pushing list prompt with nested output schema""" + ALIAS = "test_prompt_list_nested_schema" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="system", content="You are a data generator"), + PromptMessage(role="user", content=f"Generate complex data {UUID}"), + ] + + prompt.push( + messages=messages, + output_type=OutputType.SCHEMA, + output_schema=ComplexOutputSchema, + ) + + prompt.pull() + + # Verify nested structure + assert prompt.output_schema is not None + assert "metadata" in prompt.output_schema.model_fields + + nested_type = prompt.output_schema.model_fields["metadata"].annotation + assert hasattr(nested_type, "model_fields") + assert "nested_field" in nested_type.model_fields + assert "nested_number" in nested_type.model_fields + + def test_push_with_deeply_nested_output_schema(self): + """Test pushing list prompt with deeply nested output schema""" + ALIAS = "test_prompt_list_deep_nested_schema" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Complex nested data {UUID}"), + ] + + prompt.push( + messages=messages, + output_type=OutputType.SCHEMA, + output_schema=VeryComplexSchema, + ) + + prompt.pull() + + # Verify 3-level nesting + assert prompt.output_schema is not None + + # Level 1 + assert "nested_obj" in prompt.output_schema.model_fields + + # Level 2 + level2_type = prompt.output_schema.model_fields["nested_obj"].annotation + assert "deep_object" in level2_type.model_fields + + # Level 3 + level3_type = level2_type.model_fields["deep_object"].annotation + assert "level3_field" in level3_type.model_fields + + def test_push_single_tool(self): + """Test pushing list prompt with a single tool""" + ALIAS = "test_prompt_list_single_tool" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Use search tool {UUID}"), + ] + + tool = Tool( + name="SearchTool", + description="Search functionality", + mode=ToolMode.STRICT, + structured_schema=ToolInputSchema, + ) + + prompt.push( + messages=messages, + tools=[tool], + ) + + prompt.pull() + + # Verify tool + assert prompt.tools is not None + assert len(prompt.tools) == 1 + assert prompt.tools[0].name == "SearchTool" + + # Verify tool schema + input_schema = prompt.tools[0].input_schema + assert "query" in input_schema["properties"] + assert input_schema["properties"]["query"]["type"] == "string" + + def test_push_multiple_tools(self): + """Test pushing list prompt with multiple tools""" + ALIAS = "test_prompt_list_multiple_tools" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Multiple tools test {UUID}"), + ] + + tool1 = Tool( + name="Tool1", + description="First tool", + mode=ToolMode.STRICT, + structured_schema=SimpleSchema, + ) + + tool2 = Tool( + name="Tool2", + description="Second tool", + mode=ToolMode.NO_ADDITIONAL, + structured_schema=ToolInputSchema, + ) + + tool3 = Tool( + name="Tool3", + description="Third tool", + mode=ToolMode.ALLOW_ADDITIONAL, + structured_schema=ComplexOutputSchema, + ) + + prompt.push( + messages=messages, + tools=[tool1, tool2, tool3], + ) + + prompt.pull() + + # Verify all tools + assert prompt.tools is not None + assert len(prompt.tools) == 3 + + tool_names = {tool.name for tool in prompt.tools} + assert tool_names == {"Tool1", "Tool2", "Tool3"} + + # Verify different modes + modes = {tool.name: tool.mode for tool in prompt.tools} + assert modes["Tool1"] == ToolMode.STRICT + assert modes["Tool2"] == ToolMode.NO_ADDITIONAL + assert modes["Tool3"] == ToolMode.ALLOW_ADDITIONAL + + def test_update_tool_by_name(self): + """Test updating a tool in list prompt""" + ALIAS = "test_prompt_list_update_tool" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Initial {UUID}"), + ] + + # Initial tool + tool = Tool( + name="UpdateableTool", + description="Original", + mode=ToolMode.STRICT, + structured_schema=ToolInputSchema, + ) + + prompt.push(messages=messages, tools=[tool]) + prompt.pull() + + assert prompt.tools[0].description == "Original" + + # Update tool + updated_tool = Tool( + name="UpdateableTool", + description="Updated", + mode=ToolMode.ALLOW_ADDITIONAL, + structured_schema=UpdatedToolInputSchema, + ) + + prompt.update( + version="latest", + tools=[updated_tool], + ) + + prompt.pull() + + assert prompt.tools[0].description == "Updated" + assert prompt.tools[0].mode == ToolMode.ALLOW_ADDITIONAL + + # Verify new field in schema + input_schema = prompt.tools[0].input_schema + assert "new_field" in input_schema["properties"] + + def test_push_output_schema_and_tools(self): + """Test pushing list prompt with both output schema and tools""" + ALIAS = "test_prompt_list_schema_and_tools" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="system", content="You are helpful"), + PromptMessage(role="user", content=f"Process {UUID}"), + ] + + tool = Tool( + name="ProcessorTool", + description="Processing tool", + mode=ToolMode.STRICT, + structured_schema=SimpleSchema, + ) + + prompt.push( + messages=messages, + output_type=OutputType.SCHEMA, + output_schema=VeryComplexSchema, + tools=[tool], + ) + prompt.output_schema = None + prompt.tools = None + prompt.pull() + + # Verify both present + assert prompt.output_schema is not None + assert prompt.tools is not None + assert len(prompt.tools) == 1 + + # Verify they're different schemas + output_fields = set(prompt.output_schema.model_fields.keys()) + tool_input_schema = prompt.tools[0].input_schema + tool_fields = set(tool_input_schema["properties"].keys()) + + # They should have different fields + assert output_fields != tool_fields + + def test_pull_preserves_tool_details(self): + """Test that pulling list prompt preserves all tool details""" + ALIAS = "test_prompt_list_tool_preservation" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Preserve test {UUID}"), + ] + + tool = Tool( + name="ComplexTool", + description="Tool with complex schema", + mode=ToolMode.NO_ADDITIONAL, + structured_schema=VeryComplexSchema, + ) + + prompt.push(messages=messages, tools=[tool]) + + # Pull multiple times + for i in range(3): + prompt.pull() + + assert prompt.tools is not None + pulled_tool = prompt.tools[0] + + assert pulled_tool.name == "ComplexTool" + assert pulled_tool.mode == ToolMode.NO_ADDITIONAL + + # Verify complex nested structure preserved + input_schema = pulled_tool.input_schema + assert "nested_obj" in input_schema["properties"] + + nested = input_schema["properties"]["nested_obj"] + assert nested["type"] == "object" + assert "deep_object" in nested["properties"] + + def test_cache_preserves_output_schema_and_tools(self): + """Test that caching preserves output schema and tools for list prompts""" + ALIAS = "test_prompt_list_cache_schema_tools" + prompt1 = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Cache test {UUID}"), + ] + + tool = Tool( + name="CachedListTool", + description="Tool for list cache test", + mode=ToolMode.STRICT, + structured_schema=ComplexOutputSchema, + ) + + prompt1.push( + messages=messages, + output_type=OutputType.SCHEMA, + output_schema=VeryComplexSchema, + tools=[tool], + ) + + prompt1.pull(write_to_cache=False) + version = prompt1.version + + prompt2 = Prompt(alias=ALIAS) + prompt2.pull(version=version) + + # Verify output schema + assert prompt2.output_schema is not None + assert set(prompt2.output_schema.model_fields.keys()) == set( + prompt1.output_schema.model_fields.keys() + ) + + # Verify tools + assert prompt2.tools is not None + assert len(prompt2.tools) == 1 + assert prompt2.tools[0].name == "CachedListTool" + + # Verify tool schema structure + schema1 = prompt1.tools[0].input_schema + schema2 = prompt2.tools[0].input_schema + assert set(schema1["properties"].keys()) == set(schema2["properties"].keys()) + + def test_add_and_remove_tools(self): + """Test adding and removing tools via update""" + ALIAS = "test_prompt_list_add_remove_tools" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"Tool management {UUID}"), + ] + + # Start with one tool + tool1 = Tool( + name="InitialTool", + description="First tool", + mode=ToolMode.STRICT, + structured_schema=SimpleSchema, + ) + + prompt.push(messages=messages, tools=[tool1]) + prompt.pull() + + assert len(prompt.tools) == 1 + assert prompt.tools[0].name == "InitialTool" + + # Add second tool + tool2 = Tool( + name="SecondTool", + description="Additional tool", + mode=ToolMode.NO_ADDITIONAL, + structured_schema=ToolInputSchema, + ) + + prompt.update( + version="latest", + tools=[tool1, tool2], + ) + prompt.pull() + + assert len(prompt.tools) == 2 + tool_names = {tool.name for tool in prompt.tools} + assert tool_names == {"InitialTool", "SecondTool"} + + # Replace with just one different tool + tool3 = Tool( + name="ReplacementTool", + description="Replacement", + mode=ToolMode.ALLOW_ADDITIONAL, + structured_schema=ComplexOutputSchema, + ) + + prompt.update( + version="latest", + tools=[tool3], + ) + prompt.pull() + + assert len(prompt.tools) == 1 + assert prompt.tools[0].name == "ReplacementTool" + + def test_tool_with_all_field_types(self): + """Test tool schema with all supported field types""" + ALIAS = "test_prompt_list_all_field_types" + prompt = Prompt(alias=ALIAS) + + UUID = uuid.uuid4() + + messages = [ + PromptMessage(role="user", content=f"All types test {UUID}"), + ] + + tool = Tool( + name="AllTypesTool", + description="Tool with all field types", + mode=ToolMode.STRICT, + structured_schema=VeryComplexSchema, + ) + + prompt.push(messages=messages, tools=[tool]) + prompt.tools = [] + prompt.pull() + + input_schema = prompt.tools[0].input_schema + props = input_schema["properties"] + + # Verify all field types are correctly represented + assert props["id"]["type"] == "string" + assert props["simple_field"]["type"] == "string" + assert props["number_field"]["type"] == "integer" + assert props["float_field"]["type"] == "number" + assert props["bool_field"]["type"] == "boolean" + assert props["nested_obj"]["type"] == "object" From 2a4b17787bffde3c74d51c0df5da215f3f2a3e97 Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 22:13:21 +0530 Subject: [PATCH 5/8] . --- tests/test_confident/test_prompt.py | 327 ++++++++++++++-------------- 1 file changed, 169 insertions(+), 158 deletions(-) diff --git a/tests/test_confident/test_prompt.py b/tests/test_confident/test_prompt.py index 918c19339..b79464037 100644 --- a/tests/test_confident/test_prompt.py +++ b/tests/test_confident/test_prompt.py @@ -12,7 +12,7 @@ ReasoningEffort, OutputType, Verbosity, - ToolMode + ToolMode, ) from deepeval.confident.api import Api from deepeval.metrics.faithfulness.schema import FaithfulnessVerdict @@ -206,26 +206,26 @@ def test_push_with_simple_output_schema(self): """Test pushing text prompt with simple output schema""" ALIAS = "test_prompt_text_simple_schema" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + prompt.push( text=f"Generate data {UUID}", output_type=OutputType.SCHEMA, output_schema=SimpleSchema, ) - + prompt.pull() - + # Verify output schema assert prompt.output_type == OutputType.SCHEMA assert prompt.output_schema is not None assert hasattr(prompt.output_schema, "model_fields") - + expected_fields = {"name", "value"} actual_fields = set(prompt.output_schema.model_fields.keys()) assert actual_fields == expected_fields - + # Verify field types assert prompt.output_schema.model_fields["name"].annotation == str assert prompt.output_schema.model_fields["value"].annotation == float @@ -234,25 +234,25 @@ def test_push_with_nested_output_schema(self): """Test pushing text prompt with nested output schema""" ALIAS = "test_prompt_text_nested_schema" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + prompt.push( text=f"Generate complex data {UUID}", output_type=OutputType.SCHEMA, output_schema=ComplexOutputSchema, ) - + prompt.pull() - + # Verify output schema assert prompt.output_type == OutputType.SCHEMA assert prompt.output_schema is not None - + expected_fields = {"title", "count", "score", "active", "metadata"} actual_fields = set(prompt.output_schema.model_fields.keys()) assert actual_fields == expected_fields - + # Verify nested object nested_type = prompt.output_schema.model_fields["metadata"].annotation assert hasattr(nested_type, "model_fields") @@ -263,28 +263,35 @@ def test_push_with_deeply_nested_output_schema(self): """Test pushing text prompt with deeply nested output schema (3 levels)""" ALIAS = "test_prompt_text_deep_nested_schema" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + prompt.push( text=f"Generate very complex data {UUID}", output_type=OutputType.SCHEMA, output_schema=VeryComplexSchema, ) - + prompt.pull() - + # Verify top level schema assert prompt.output_schema is not None top_fields = set(prompt.output_schema.model_fields.keys()) - assert top_fields == {"id", "simple_field", "number_field", "float_field", "bool_field", "nested_obj"} - + assert top_fields == { + "id", + "simple_field", + "number_field", + "float_field", + "bool_field", + "nested_obj", + } + # Verify level 2 nested object level2_type = prompt.output_schema.model_fields["nested_obj"].annotation assert hasattr(level2_type, "model_fields") level2_fields = set(level2_type.model_fields.keys()) assert level2_fields == {"level2_field", "deep_object"} - + # Verify level 3 nested object level3_type = level2_type.model_fields["deep_object"].annotation assert hasattr(level3_type, "model_fields") @@ -295,36 +302,36 @@ def test_push_single_tool(self): """Test pushing text prompt with a single tool""" ALIAS = "test_prompt_text_single_tool" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + tool = Tool( name="SearchTool", description="A tool for searching", mode=ToolMode.STRICT, structured_schema=ToolInputSchema, ) - + prompt.push( text=f"Use the search tool {UUID}", tools=[tool], ) - + prompt.pull() - + # Verify tools assert prompt.tools is not None assert len(prompt.tools) == 1 - + pulled_tool = prompt.tools[0] assert pulled_tool.name == "SearchTool" assert pulled_tool.description == "A tool for searching" assert pulled_tool.mode == ToolMode.STRICT - + # Verify tool schema assert pulled_tool.structured_schema is not None assert pulled_tool.structured_schema.fields is not None - + # Check input_schema property input_schema = pulled_tool.input_schema assert input_schema["type"] == "object" @@ -336,37 +343,37 @@ def test_push_multiple_tools(self): """Test pushing text prompt with multiple tools""" ALIAS = "test_prompt_text_multiple_tools" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + tool1 = Tool( name="SearchTool", description="Search tool", mode=ToolMode.STRICT, structured_schema=ToolInputSchema, ) - + tool2 = Tool( name="AnalysisTool", description="Analysis tool", mode=ToolMode.NO_ADDITIONAL, structured_schema=SimpleSchema, ) - + prompt.push( text=f"Use multiple tools {UUID}", tools=[tool1, tool2], ) - + prompt.pull() - + # Verify tools assert prompt.tools is not None assert len(prompt.tools) == 2 - + tool_names = {tool.name for tool in prompt.tools} assert tool_names == {"SearchTool", "AnalysisTool"} - + # Verify each tool for tool in prompt.tools: assert tool.structured_schema is not None @@ -376,9 +383,9 @@ def test_update_tool_by_name(self): """Test updating a tool with the same name (should replace it)""" ALIAS = "test_prompt_text_update_tool" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + # Push initial tool original_tool = Tool( name="SearchTool", @@ -386,17 +393,17 @@ def test_update_tool_by_name(self): mode=ToolMode.STRICT, structured_schema=ToolInputSchema, ) - + prompt.push( text=f"Initial tool push {UUID}", tools=[original_tool], ) - + prompt.pull() - + initial_tool = prompt.tools[0] assert initial_tool.description == "Original search tool" - + # Update with new tool (same name) updated_tool = Tool( name="SearchTool", # Same name @@ -404,23 +411,23 @@ def test_update_tool_by_name(self): mode=ToolMode.NO_ADDITIONAL, # Different mode structured_schema=UpdatedToolInputSchema, # Different schema ) - + prompt.update( version="latest", tools=[updated_tool], ) - + prompt.pull() - + # Verify tool was updated assert prompt.tools is not None assert len(prompt.tools) == 1 - + final_tool = prompt.tools[0] assert final_tool.name == "SearchTool" assert final_tool.description == "Updated search tool" assert final_tool.mode == ToolMode.NO_ADDITIONAL - + # Verify schema was updated input_schema = final_tool.input_schema assert "new_field" in input_schema["properties"] @@ -429,16 +436,16 @@ def test_push_output_schema_and_tools(self): """Test pushing both output schema and tools together""" ALIAS = "test_prompt_text_schema_and_tools" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + tool = Tool( name="DataTool", description="Data processing tool", mode=ToolMode.STRICT, structured_schema=SimpleSchema, ) - + prompt.push( text=f"Process data with tool {UUID}", output_type=OutputType.SCHEMA, @@ -448,12 +455,12 @@ def test_push_output_schema_and_tools(self): prompt.output_schema = None prompt.tools = None prompt.pull() - + # Verify output schema assert prompt.output_type == OutputType.SCHEMA assert prompt.output_schema is not None assert "title" in prompt.output_schema.model_fields - + # Verify tool assert prompt.tools is not None assert len(prompt.tools) == 1 @@ -463,41 +470,43 @@ def test_pull_preserves_tool_details(self): """Test that pulling preserves all tool details including schema structure""" ALIAS = "test_prompt_text_tool_preservation" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + tool = Tool( name="DetailedTool", description="A tool with detailed schema", mode=ToolMode.STRICT, structured_schema=VeryComplexSchema, ) - + prompt.push( text=f"Detailed tool test {UUID}", tools=[tool], ) - + # Pull multiple times to ensure consistency for _ in range(3): prompt.pull() - + assert prompt.tools is not None assert len(prompt.tools) == 1 - + pulled_tool = prompt.tools[0] assert pulled_tool.name == "DetailedTool" assert pulled_tool.description == "A tool with detailed schema" assert pulled_tool.mode == ToolMode.STRICT - + # Verify input schema has all fields input_schema = pulled_tool.input_schema assert "id" in input_schema["properties"] assert "simple_field" in input_schema["properties"] assert "nested_obj" in input_schema["properties"] - + # Verify nested structure - nested_props = input_schema["properties"]["nested_obj"]["properties"] + nested_props = input_schema["properties"]["nested_obj"][ + "properties" + ] assert "level2_field" in nested_props assert "deep_object" in nested_props @@ -505,37 +514,37 @@ def test_cache_preserves_output_schema_and_tools(self): """Test that caching preserves output schema and tools""" ALIAS = "test_prompt_text_cache_schema_tools" prompt1 = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + tool = Tool( name="CachedTool", description="Tool for cache test", mode=ToolMode.STRICT, structured_schema=SimpleSchema, ) - + prompt1.push( text=f"Cache test {UUID}", output_type=OutputType.SCHEMA, output_schema=ComplexOutputSchema, tools=[tool], ) - + # Pull and cache prompt1.pull(write_to_cache=False) version = prompt1.version - + # Load from cache prompt2 = Prompt(alias=ALIAS) prompt2.pull(version=version) - + # Verify output schema preserved assert prompt2.output_schema is not None assert set(prompt2.output_schema.model_fields.keys()) == set( prompt1.output_schema.model_fields.keys() ) - + # Verify tools preserved assert prompt2.tools is not None assert len(prompt2.tools) == len(prompt1.tools) @@ -838,26 +847,26 @@ def test_push_with_simple_output_schema(self): """Test pushing list prompt with simple output schema""" ALIAS = "test_prompt_list_simple_schema" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Generate data {UUID}"), PromptMessage(role="assistant", content=f"Here's the data {UUID}"), ] - + prompt.push( messages=messages, output_type=OutputType.SCHEMA, output_schema=SimpleSchema, ) - + prompt.pull() - + # Verify output schema assert prompt.output_type == OutputType.SCHEMA assert prompt.output_schema is not None - + expected_fields = {"name", "value"} actual_fields = set(prompt.output_schema.model_fields.keys()) assert actual_fields == expected_fields @@ -866,26 +875,26 @@ def test_push_with_nested_output_schema(self): """Test pushing list prompt with nested output schema""" ALIAS = "test_prompt_list_nested_schema" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="system", content="You are a data generator"), PromptMessage(role="user", content=f"Generate complex data {UUID}"), ] - + prompt.push( messages=messages, output_type=OutputType.SCHEMA, output_schema=ComplexOutputSchema, ) - + prompt.pull() - + # Verify nested structure assert prompt.output_schema is not None assert "metadata" in prompt.output_schema.model_fields - + nested_type = prompt.output_schema.model_fields["metadata"].annotation assert hasattr(nested_type, "model_fields") assert "nested_field" in nested_type.model_fields @@ -895,31 +904,31 @@ def test_push_with_deeply_nested_output_schema(self): """Test pushing list prompt with deeply nested output schema""" ALIAS = "test_prompt_list_deep_nested_schema" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Complex nested data {UUID}"), ] - + prompt.push( messages=messages, output_type=OutputType.SCHEMA, output_schema=VeryComplexSchema, ) - + prompt.pull() - + # Verify 3-level nesting assert prompt.output_schema is not None - + # Level 1 assert "nested_obj" in prompt.output_schema.model_fields - + # Level 2 level2_type = prompt.output_schema.model_fields["nested_obj"].annotation assert "deep_object" in level2_type.model_fields - + # Level 3 level3_type = level2_type.model_fields["deep_object"].annotation assert "level3_field" in level3_type.model_fields @@ -928,32 +937,32 @@ def test_push_single_tool(self): """Test pushing list prompt with a single tool""" ALIAS = "test_prompt_list_single_tool" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Use search tool {UUID}"), ] - + tool = Tool( name="SearchTool", description="Search functionality", mode=ToolMode.STRICT, structured_schema=ToolInputSchema, ) - + prompt.push( messages=messages, tools=[tool], ) - + prompt.pull() - + # Verify tool assert prompt.tools is not None assert len(prompt.tools) == 1 assert prompt.tools[0].name == "SearchTool" - + # Verify tool schema input_schema = prompt.tools[0].input_schema assert "query" in input_schema["properties"] @@ -963,48 +972,48 @@ def test_push_multiple_tools(self): """Test pushing list prompt with multiple tools""" ALIAS = "test_prompt_list_multiple_tools" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Multiple tools test {UUID}"), ] - + tool1 = Tool( name="Tool1", description="First tool", mode=ToolMode.STRICT, structured_schema=SimpleSchema, ) - + tool2 = Tool( name="Tool2", description="Second tool", mode=ToolMode.NO_ADDITIONAL, structured_schema=ToolInputSchema, ) - + tool3 = Tool( name="Tool3", description="Third tool", mode=ToolMode.ALLOW_ADDITIONAL, structured_schema=ComplexOutputSchema, ) - + prompt.push( messages=messages, tools=[tool1, tool2, tool3], ) - + prompt.pull() - + # Verify all tools assert prompt.tools is not None assert len(prompt.tools) == 3 - + tool_names = {tool.name for tool in prompt.tools} assert tool_names == {"Tool1", "Tool2", "Tool3"} - + # Verify different modes modes = {tool.name: tool.mode for tool in prompt.tools} assert modes["Tool1"] == ToolMode.STRICT @@ -1015,13 +1024,13 @@ def test_update_tool_by_name(self): """Test updating a tool in list prompt""" ALIAS = "test_prompt_list_update_tool" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Initial {UUID}"), ] - + # Initial tool tool = Tool( name="UpdateableTool", @@ -1029,12 +1038,12 @@ def test_update_tool_by_name(self): mode=ToolMode.STRICT, structured_schema=ToolInputSchema, ) - + prompt.push(messages=messages, tools=[tool]) prompt.pull() - + assert prompt.tools[0].description == "Original" - + # Update tool updated_tool = Tool( name="UpdateableTool", @@ -1042,17 +1051,17 @@ def test_update_tool_by_name(self): mode=ToolMode.ALLOW_ADDITIONAL, structured_schema=UpdatedToolInputSchema, ) - + prompt.update( version="latest", tools=[updated_tool], ) - + prompt.pull() - + assert prompt.tools[0].description == "Updated" assert prompt.tools[0].mode == ToolMode.ALLOW_ADDITIONAL - + # Verify new field in schema input_schema = prompt.tools[0].input_schema assert "new_field" in input_schema["properties"] @@ -1061,21 +1070,21 @@ def test_push_output_schema_and_tools(self): """Test pushing list prompt with both output schema and tools""" ALIAS = "test_prompt_list_schema_and_tools" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="system", content="You are helpful"), PromptMessage(role="user", content=f"Process {UUID}"), ] - + tool = Tool( name="ProcessorTool", description="Processing tool", mode=ToolMode.STRICT, structured_schema=SimpleSchema, ) - + prompt.push( messages=messages, output_type=OutputType.SCHEMA, @@ -1085,17 +1094,17 @@ def test_push_output_schema_and_tools(self): prompt.output_schema = None prompt.tools = None prompt.pull() - + # Verify both present assert prompt.output_schema is not None assert prompt.tools is not None assert len(prompt.tools) == 1 - + # Verify they're different schemas output_fields = set(prompt.output_schema.model_fields.keys()) tool_input_schema = prompt.tools[0].input_schema tool_fields = set(tool_input_schema["properties"].keys()) - + # They should have different fields assert output_fields != tool_fields @@ -1103,36 +1112,36 @@ def test_pull_preserves_tool_details(self): """Test that pulling list prompt preserves all tool details""" ALIAS = "test_prompt_list_tool_preservation" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Preserve test {UUID}"), ] - + tool = Tool( name="ComplexTool", description="Tool with complex schema", mode=ToolMode.NO_ADDITIONAL, structured_schema=VeryComplexSchema, ) - + prompt.push(messages=messages, tools=[tool]) - + # Pull multiple times for i in range(3): prompt.pull() - + assert prompt.tools is not None pulled_tool = prompt.tools[0] - + assert pulled_tool.name == "ComplexTool" assert pulled_tool.mode == ToolMode.NO_ADDITIONAL - + # Verify complex nested structure preserved input_schema = pulled_tool.input_schema assert "nested_obj" in input_schema["properties"] - + nested = input_schema["properties"]["nested_obj"] assert nested["type"] == "object" assert "deep_object" in nested["properties"] @@ -1141,60 +1150,62 @@ def test_cache_preserves_output_schema_and_tools(self): """Test that caching preserves output schema and tools for list prompts""" ALIAS = "test_prompt_list_cache_schema_tools" prompt1 = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Cache test {UUID}"), ] - + tool = Tool( name="CachedListTool", description="Tool for list cache test", mode=ToolMode.STRICT, structured_schema=ComplexOutputSchema, ) - + prompt1.push( messages=messages, output_type=OutputType.SCHEMA, output_schema=VeryComplexSchema, tools=[tool], ) - + prompt1.pull(write_to_cache=False) version = prompt1.version - + prompt2 = Prompt(alias=ALIAS) prompt2.pull(version=version) - + # Verify output schema assert prompt2.output_schema is not None assert set(prompt2.output_schema.model_fields.keys()) == set( prompt1.output_schema.model_fields.keys() ) - + # Verify tools assert prompt2.tools is not None assert len(prompt2.tools) == 1 assert prompt2.tools[0].name == "CachedListTool" - + # Verify tool schema structure schema1 = prompt1.tools[0].input_schema schema2 = prompt2.tools[0].input_schema - assert set(schema1["properties"].keys()) == set(schema2["properties"].keys()) + assert set(schema1["properties"].keys()) == set( + schema2["properties"].keys() + ) def test_add_and_remove_tools(self): """Test adding and removing tools via update""" ALIAS = "test_prompt_list_add_remove_tools" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"Tool management {UUID}"), ] - + # Start with one tool tool1 = Tool( name="InitialTool", @@ -1202,13 +1213,13 @@ def test_add_and_remove_tools(self): mode=ToolMode.STRICT, structured_schema=SimpleSchema, ) - + prompt.push(messages=messages, tools=[tool1]) prompt.pull() - + assert len(prompt.tools) == 1 assert prompt.tools[0].name == "InitialTool" - + # Add second tool tool2 = Tool( name="SecondTool", @@ -1216,17 +1227,17 @@ def test_add_and_remove_tools(self): mode=ToolMode.NO_ADDITIONAL, structured_schema=ToolInputSchema, ) - + prompt.update( version="latest", tools=[tool1, tool2], ) prompt.pull() - + assert len(prompt.tools) == 2 tool_names = {tool.name for tool in prompt.tools} assert tool_names == {"InitialTool", "SecondTool"} - + # Replace with just one different tool tool3 = Tool( name="ReplacementTool", @@ -1234,13 +1245,13 @@ def test_add_and_remove_tools(self): mode=ToolMode.ALLOW_ADDITIONAL, structured_schema=ComplexOutputSchema, ) - + prompt.update( version="latest", tools=[tool3], ) prompt.pull() - + assert len(prompt.tools) == 1 assert prompt.tools[0].name == "ReplacementTool" @@ -1248,27 +1259,27 @@ def test_tool_with_all_field_types(self): """Test tool schema with all supported field types""" ALIAS = "test_prompt_list_all_field_types" prompt = Prompt(alias=ALIAS) - + UUID = uuid.uuid4() - + messages = [ PromptMessage(role="user", content=f"All types test {UUID}"), ] - + tool = Tool( name="AllTypesTool", description="Tool with all field types", mode=ToolMode.STRICT, structured_schema=VeryComplexSchema, ) - + prompt.push(messages=messages, tools=[tool]) prompt.tools = [] prompt.pull() - + input_schema = prompt.tools[0].input_schema props = input_schema["properties"] - + # Verify all field types are correctly represented assert props["id"]["type"] == "string" assert props["simple_field"]["type"] == "string" From cfec1ca577ef6d1eee1fe0a252b0eb54626cd6c3 Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 22:34:09 +0530 Subject: [PATCH 6/8] . --- tests/test_confident/test_prompt.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/test_confident/test_prompt.py b/tests/test_confident/test_prompt.py index b79464037..d21534ea1 100644 --- a/tests/test_confident/test_prompt.py +++ b/tests/test_confident/test_prompt.py @@ -76,15 +76,17 @@ class TestPromptText: def test_push(self): prompt = Prompt(alias=self.ALIAS) - UUID = uuid.uuid4() + UUID = str(uuid.uuid4()) + + TEXT = f"Hello, world! {UUID}" # generate uuid - prompt.push(text=f"Hello, world! {UUID}") + prompt.push(text=TEXT) prompt.pull() assert prompt.version[0] == "0" - assert prompt.text_template == f"Hello, world! {UUID}" + assert prompt.text_template == TEXT assert prompt.messages_template is None assert prompt._prompt_version_id is not None assert prompt.type == PromptType.TEXT @@ -93,17 +95,18 @@ def test_push(self): def test_push_with_interpolation_type(self): prompt = Prompt(alias=self.ALIAS_WITH_INTERPOLATION_TYPE) - UUID = uuid.uuid4() + UUID = str(uuid.uuid4()) + TEXT = f"Hello, world! {UUID}" prompt.push( - text=f"Hello, world! {UUID}", + text=TEXT, interpolation_type=PromptInterpolationType.MUSTACHE, ) prompt.pull() assert prompt.version[0] == "0" - assert prompt.text_template == f"Hello, world! {UUID}" + assert prompt.text_template == TEXT assert prompt.messages_template is None assert prompt._prompt_version_id is not None assert prompt.type == PromptType.TEXT @@ -316,8 +319,8 @@ def test_push_single_tool(self): text=f"Use the search tool {UUID}", tools=[tool], ) - - prompt.pull() + prompt.tools = None + prompt.pull(default_to_cache=False) # Verify tools assert prompt.tools is not None @@ -416,8 +419,8 @@ def test_update_tool_by_name(self): version="latest", tools=[updated_tool], ) - - prompt.pull() + prompt.tools = None + prompt.pull(default_to_cache=False) # Verify tool was updated assert prompt.tools is not None @@ -955,8 +958,8 @@ def test_push_single_tool(self): messages=messages, tools=[tool], ) - - prompt.pull() + prompt.tools = None + prompt.pull(default_to_cache=False) # Verify tool assert prompt.tools is not None @@ -1250,6 +1253,7 @@ def test_add_and_remove_tools(self): version="latest", tools=[tool3], ) + prompt.tools = None prompt.pull() assert len(prompt.tools) == 1 From a9191306f261bcc33dcf6243ff5066bdd631fcb8 Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Wed, 4 Feb 2026 23:59:07 +0530 Subject: [PATCH 7/8] . --- tests/test_confident/test_prompt.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_confident/test_prompt.py b/tests/test_confident/test_prompt.py index d21534ea1..acf5df828 100644 --- a/tests/test_confident/test_prompt.py +++ b/tests/test_confident/test_prompt.py @@ -103,7 +103,7 @@ def test_push_with_interpolation_type(self): interpolation_type=PromptInterpolationType.MUSTACHE, ) - prompt.pull() + prompt.pull(default_to_cache=False) assert prompt.version[0] == "0" assert prompt.text_template == TEXT @@ -640,9 +640,7 @@ def test_push_with_interpolation_type(self): interpolation_type=PromptInterpolationType.MUSTACHE, ) - time.sleep(5) - - prompt.pull() + prompt.pull(default_to_cache=False) assert prompt.version[0] == "0" assert prompt.text_template is None From a364ca254fbe2185f6a9b1c596c81f694dbc08eb Mon Sep 17 00:00:00 2001 From: A-Vamshi Date: Thu, 5 Feb 2026 19:25:05 +0530 Subject: [PATCH 8/8] . --- docs/docs/evaluation-prompts.mdx | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/docs/evaluation-prompts.mdx b/docs/docs/evaluation-prompts.mdx index 665c61e23..693423dd3 100644 --- a/docs/docs/evaluation-prompts.mdx +++ b/docs/docs/evaluation-prompts.mdx @@ -405,3 +405,42 @@ There are **TWO** output settings you can associate with a prompt: - `output_type`: The string specifying the model to use for generation. - `output_schema`: The schema of type `BaseModel` of the output, if `output_type` is `OutputType.SCHEMA`. + +### Tools + +The tools in a prompt are used to specify the tools your agent has access to, all tools are identified using thier name and hence must be unique. + +```python +from deepeval.prompt import Prompt, Tool +from deepeval.prompt.api import ToolMode +from pydantic import BaseModel + +class ToolInputSchema(BaseModel): + result: str + confidence: float + +prompt = Prompt(alias="YOUR-PROMPT-ALIAS") +tool = Tool( + name="ExploreTool", + description="Tool used for browsing the internet", + mode=ToolMode.STRICT, + structured_schema=ToolInputSchema, +) + +prompt.push( + text="This is a prompt with a tool", + tools=[tool] +) + +# You can also update an existing tool by using the new tool in the push / update method: +tool2 = Tool( + name="ExploreTool", # Must have the same name to update a tool + description="Tool used for browsing the internet", + mode=ToolMode.ALLOW_ADDITIONAL, + structured_schema=ToolInputSchema, +) + +prompt.update( + tools=[tool2] +) +``` \ No newline at end of file