|
| 1 | +"""Base prompt class for metrics with structured input/output models.""" |
| 2 | + |
| 3 | +import json |
| 4 | +import typing as t |
| 5 | +from abc import ABC |
| 6 | + |
| 7 | +from pydantic import BaseModel |
| 8 | + |
| 9 | +# Type variables for generics |
| 10 | +InputModel = t.TypeVar("InputModel", bound=BaseModel) |
| 11 | +OutputModel = t.TypeVar("OutputModel", bound=BaseModel) |
| 12 | + |
| 13 | + |
| 14 | +class BasePrompt(ABC, t.Generic[InputModel, OutputModel]): |
| 15 | + """ |
| 16 | + Base class for structured prompts with type-safe input/output models. |
| 17 | +
|
| 18 | + Attributes: |
| 19 | + input_model: Pydantic model class for input validation |
| 20 | + output_model: Pydantic model class for output schema generation |
| 21 | + instruction: Task description for the LLM |
| 22 | + examples: List of (input, output) example pairs for few-shot learning |
| 23 | + language: Language for the prompt (default: "english") |
| 24 | + """ |
| 25 | + |
| 26 | + # Must be set by subclasses |
| 27 | + input_model: t.Type[InputModel] |
| 28 | + output_model: t.Type[OutputModel] |
| 29 | + instruction: str |
| 30 | + examples: t.List[t.Tuple[InputModel, OutputModel]] |
| 31 | + language: str = "english" |
| 32 | + |
| 33 | + def to_string(self, data: InputModel) -> str: |
| 34 | + """ |
| 35 | + Convert prompt with input data to complete prompt string for LLM. |
| 36 | +
|
| 37 | + Args: |
| 38 | + data: Input data instance (validated by input_model) |
| 39 | +
|
| 40 | + Returns: |
| 41 | + Complete prompt string ready for LLM |
| 42 | + """ |
| 43 | + # Generate JSON schema for output |
| 44 | + output_schema = json.dumps(self.output_model.model_json_schema()) |
| 45 | + |
| 46 | + # Generate examples section |
| 47 | + examples_str = self._generate_examples() |
| 48 | + |
| 49 | + # Convert input data to JSON |
| 50 | + input_json = data.model_dump_json(indent=4, exclude_none=True) |
| 51 | + |
| 52 | + # Build complete prompt (matches existing function format) |
| 53 | + return f"""{self.instruction} |
| 54 | +Please return the output in a JSON format that complies with the following schema as specified in JSON Schema: |
| 55 | +{output_schema}Do not use single quotes in your response but double quotes,properly escaped with a backslash. |
| 56 | +
|
| 57 | +{examples_str} |
| 58 | +----------------------------- |
| 59 | +
|
| 60 | +Now perform the same with the following input |
| 61 | +input: {input_json} |
| 62 | +Output: """ |
| 63 | + |
| 64 | + def _generate_examples(self) -> str: |
| 65 | + """ |
| 66 | + Generate examples section of the prompt. |
| 67 | +
|
| 68 | + Returns: |
| 69 | + Formatted examples string or empty string if no examples |
| 70 | + """ |
| 71 | + if not self.examples: |
| 72 | + return "" |
| 73 | + |
| 74 | + example_strings = [] |
| 75 | + for idx, (input_data, output_data) in enumerate(self.examples): |
| 76 | + example_strings.append( |
| 77 | + f"Example {idx + 1}\n" |
| 78 | + f"Input: {input_data.model_dump_json(indent=4)}\n" |
| 79 | + f"Output: {output_data.model_dump_json(indent=4)}" |
| 80 | + ) |
| 81 | + |
| 82 | + return "--------EXAMPLES-----------\n" + "\n\n".join(example_strings) |
| 83 | + |
| 84 | + async def adapt( |
| 85 | + self, |
| 86 | + target_language: str, |
| 87 | + llm, |
| 88 | + adapt_instruction: bool = False, |
| 89 | + ) -> "BasePrompt[InputModel, OutputModel]": |
| 90 | + """ |
| 91 | + Adapt the prompt to a new language using minimal translation. |
| 92 | +
|
| 93 | + Args: |
| 94 | + target_language: Target language (e.g., "spanish", "french") |
| 95 | + llm: LLM instance for translation |
| 96 | + adapt_instruction: Whether to adapt instruction text (default: False) |
| 97 | +
|
| 98 | + Returns: |
| 99 | + New prompt instance adapted to the target language |
| 100 | + """ |
| 101 | + import copy |
| 102 | + |
| 103 | + # Create adapted prompt |
| 104 | + new_prompt = copy.deepcopy(self) |
| 105 | + new_prompt.language = target_language |
| 106 | + |
| 107 | + # Translate instruction if requested |
| 108 | + if adapt_instruction: |
| 109 | + instruction_prompt = f"Translate this to {target_language}, keep technical terms: {self.instruction}" |
| 110 | + try: |
| 111 | + response = await llm.agenerate(instruction_prompt) |
| 112 | + new_prompt.instruction = str(response).strip() |
| 113 | + except Exception: |
| 114 | + # Keep original if translation fails |
| 115 | + pass |
| 116 | + |
| 117 | + # Translate examples (simplified approach) |
| 118 | + translated_examples = [] |
| 119 | + for input_ex, output_ex in self.examples: |
| 120 | + try: |
| 121 | + # Simple per-example translation |
| 122 | + example_prompt = f"""Translate this example to {target_language}, keep the same structure: |
| 123 | +
|
| 124 | +Input: {input_ex.model_dump_json()} |
| 125 | +Output: {output_ex.model_dump_json()} |
| 126 | +
|
| 127 | +Return as: Input: {{translated_input_json}} Output: {{translated_output_json}}""" |
| 128 | + |
| 129 | + response = await llm.agenerate(example_prompt) |
| 130 | + |
| 131 | + # Try to extract translated JSON (basic parsing) |
| 132 | + response_str = str(response) |
| 133 | + if "Input:" in response_str and "Output:" in response_str: |
| 134 | + parts = response_str.split("Output:") |
| 135 | + input_part = parts[0].replace("Input:", "").strip() |
| 136 | + output_part = parts[1].strip() |
| 137 | + |
| 138 | + translated_input = self.input_model.model_validate_json(input_part) |
| 139 | + translated_output = self.output_model.model_validate_json( |
| 140 | + output_part |
| 141 | + ) |
| 142 | + translated_examples.append((translated_input, translated_output)) |
| 143 | + else: |
| 144 | + # Fallback to original |
| 145 | + translated_examples.append((input_ex, output_ex)) |
| 146 | + |
| 147 | + except Exception: |
| 148 | + # Fallback to original example if translation fails |
| 149 | + translated_examples.append((input_ex, output_ex)) |
| 150 | + |
| 151 | + new_prompt.examples = translated_examples |
| 152 | + return new_prompt |
0 commit comments