Add stop_sequences to ModelSettings (#1419)

Kludex · web-flow · commit 39daccd5bbd6 · 2025-04-09T09:45:02.000Z
diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -226,6 +226,7 @@ async def _messages_create(
                 tools=tools or NOT_GIVEN,
                 tool_choice=tool_choice or NOT_GIVEN,
                 stream=stream,
+                stop_sequences=model_settings.get('stop_sequences', NOT_GIVEN),
                 temperature=model_settings.get('temperature', NOT_GIVEN),
                 top_p=model_settings.get('top_p', NOT_GIVEN),
                 timeout=model_settings.get('timeout', NOT_GIVEN),
diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py
@@ -294,9 +294,8 @@ def _map_inference_config(
             inference_config['temperature'] = temperature
         if top_p := model_settings.get('top_p'):
             inference_config['topP'] = top_p
-        # TODO(Marcelo): This is not included in model_settings yet.
-        # if stop_sequences := model_settings.get('stop_sequences'):
-        #     inference_config['stopSequences'] = stop_sequences
+        if stop_sequences := model_settings.get('stop_sequences'):
+            inference_config['stopSequences'] = stop_sequences
 
         return inference_config
 
diff --git a/pydantic_ai_slim/pydantic_ai/models/cohere.py b/pydantic_ai_slim/pydantic_ai/models/cohere.py
@@ -118,7 +118,7 @@ def __init__(
                 'cohere' or an instance of `Provider[AsyncClientV2]`. If not provided, a new provider will be
                 created using the other parameters.
         """
-        self._model_name: CohereModelName = model_name
+        self._model_name = model_name
 
         if isinstance(provider, str):
             provider = infer_provider(provider)
@@ -163,6 +163,7 @@ async def _chat(
                 messages=cohere_messages,
                 tools=tools or OMIT,
                 max_tokens=model_settings.get('max_tokens', OMIT),
+                stop_sequences=model_settings.get('stop_sequences', OMIT),
                 temperature=model_settings.get('temperature', OMIT),
                 p=model_settings.get('top_p', OMIT),
                 seed=model_settings.get('seed', OMIT),
diff --git a/pydantic_ai_slim/pydantic_ai/models/gemini.py b/pydantic_ai_slim/pydantic_ai/models/gemini.py
@@ -506,6 +506,7 @@ class _GeminiGenerationConfig(TypedDict, total=False):
     top_p: float
     presence_penalty: float
     frequency_penalty: float
+    stop_sequences: list[str]
 
 
 class _GeminiContent(TypedDict):
diff --git a/pydantic_ai_slim/pydantic_ai/models/groq.py b/pydantic_ai_slim/pydantic_ai/models/groq.py
@@ -208,6 +208,7 @@ async def _completions_create(
                 parallel_tool_calls=model_settings.get('parallel_tool_calls', NOT_GIVEN),
                 tools=tools or NOT_GIVEN,
                 tool_choice=tool_choice or NOT_GIVEN,
+                stop=model_settings.get('stop_sequences', NOT_GIVEN),
                 stream=stream,
                 max_tokens=model_settings.get('max_tokens', NOT_GIVEN),
                 temperature=model_settings.get('temperature', NOT_GIVEN),
diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py
@@ -199,6 +199,7 @@ async def _completions_create(
                 top_p=model_settings.get('top_p', 1),
                 timeout_ms=self._get_timeout_ms(model_settings.get('timeout')),
                 random_seed=model_settings.get('seed', UNSET),
+                stop=model_settings.get('stop_sequences', None),
             )
         except SDKError as e:
             if (status_code := e.status_code) >= 400:
@@ -236,6 +237,7 @@ async def _stream_completions_create(
                 timeout_ms=self._get_timeout_ms(model_settings.get('timeout')),
                 presence_penalty=model_settings.get('presence_penalty'),
                 frequency_penalty=model_settings.get('frequency_penalty'),
+                stop=model_settings.get('stop_sequences', None),
             )
 
         elif model_request_parameters.result_tools:
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -271,6 +271,7 @@ async def _completions_create(
                 tool_choice=tool_choice or NOT_GIVEN,
                 stream=stream,
                 stream_options={'include_usage': True} if stream else NOT_GIVEN,
+                stop=model_settings.get('stop_sequences', NOT_GIVEN),
                 max_completion_tokens=model_settings.get('max_tokens', NOT_GIVEN),
                 temperature=model_settings.get('temperature', NOT_GIVEN),
                 top_p=model_settings.get('top_p', NOT_GIVEN),
@@ -611,7 +612,7 @@ async def _responses_create(
                 truncation=model_settings.get('openai_truncation', NOT_GIVEN),
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 reasoning=reasoning,
-                user=model_settings.get('user', NOT_GIVEN),
+                user=model_settings.get('openai_user', NOT_GIVEN),
             )
         except APIStatusError as e:
             if (status_code := e.status_code) >= 400:
diff --git a/pydantic_ai_slim/pydantic_ai/settings.py b/pydantic_ai_slim/pydantic_ai/settings.py
@@ -128,6 +128,19 @@ class ModelSettings(TypedDict, total=False):
     * Groq
     """
 
+    stop_sequences: list[str]
+    """Sequences that will cause the model to stop generating.
+
+    Supported by:
+
+    * OpenAI
+    * Anthropic
+    * Bedrock
+    * Mistral
+    * Groq
+    * Cohere
+    """
+
 
 def merge_model_settings(base: ModelSettings | None, overrides: ModelSettings | None) -> ModelSettings | None:
     """Merge two sets of model settings, preferring the overrides.
diff --git a/tests/cassettes/test_settings/test_stop_settings[anthropic].yaml b/tests/cassettes/test_settings/test_stop_settings[anthropic].yaml
@@ -0,0 +1,57 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '193'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+    method: POST
+    parsed_body:
+      max_tokens: 1024
+      messages:
+      - content:
+        - text: What is the capital of France?
+          type: text
+        role: user
+      model: claude-3-5-sonnet-latest
+      stop_sequences:
+      - Paris
+      stream: false
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    headers:
+      connection:
+      - keep-alive
+      content-length:
+      - '333'
+      content-type:
+      - application/json
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      content:
+      - text: 'The capital of France is '
+        type: text
+      id: msg_01FfkgikmbDFzn9XE1YYkJmA
+      model: claude-3-5-sonnet-20241022
+      role: assistant
+      stop_reason: stop_sequence
+      stop_sequence: Paris
+      type: message
+      usage:
+        cache_creation_input_tokens: 0
+        cache_read_input_tokens: 0
+        input_tokens: 14
+        output_tokens: 6
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_settings/test_stop_settings[bedrock].yaml b/tests/cassettes/test_settings/test_stop_settings[bedrock].yaml
@@ -0,0 +1,43 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": [{"text": "What is the capital of France?"}]}], "system": [], "inferenceConfig":
+      {"stopSequences": ["Paris"]}}'
+    headers:
+      amz-sdk-invocation-id:
+      - !!binary |
+        YzVkZjljOTMtMDQ1Zi00NWE0LWJhY2YtMDAwMjdjYTg1NmRl
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+      content-length:
+      - '152'
+      content-type:
+      - !!binary |
+        YXBwbGljYXRpb24vanNvbg==
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse
+  response:
+    headers:
+      connection:
+      - keep-alive
+      content-length:
+      - '209'
+      content-type:
+      - application/json
+    parsed_body:
+      metrics:
+        latencyMs: 179
+      output:
+        message:
+          content:
+          - text: The capital of France is Paris
+          role: assistant
+      stopReason: end_turn
+      usage:
+        inputTokens: 7
+        outputTokens: 6
+        totalTokens: 13
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_settings/test_stop_settings[cohere].yaml b/tests/cassettes/test_settings/test_stop_settings[cohere].yaml
@@ -0,0 +1,66 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '140'
+      content-type:
+      - application/json
+      host:
+      - api.cohere.com
+    method: POST
+    parsed_body:
+      messages:
+      - content: What is the capital of France?
+        role: user
+      model: command-r-plus
+      stop_sequences:
+      - Paris
+      stream: false
+    uri: https://api.cohere.com/v2/chat
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Debug-Trace-ID
+      alt-svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      cache-control:
+      - no-cache, no-store, no-transform, must-revalidate, private, max-age=0
+      content-length:
+      - '280'
+      content-type:
+      - application/json
+      expires:
+      - Thu, 01 Jan 1970 00:00:00 UTC
+      num_chars:
+      - '1200'
+      num_tokens:
+      - '12'
+      pragma:
+      - no-cache
+      vary:
+      - Origin
+    parsed_body:
+      finish_reason: STOP_SEQUENCE
+      id: 2ed6908c-e5cb-4063-b2a3-0ac4990d1b85
+      message:
+        content:
+        - text: The capital of France is
+          type: text
+        role: assistant
+      usage:
+        billed_units:
+          input_tokens: 7
+          output_tokens: 5
+        tokens:
+          input_tokens: 200
+          output_tokens: 7
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_settings/test_stop_settings[gemini].yaml b/tests/cassettes/test_settings/test_stop_settings[gemini].yaml
@@ -0,0 +1,60 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '154'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+    method: POST
+    parsed_body:
+      contents:
+      - parts:
+        - text: What is the capital of France?
+        role: user
+      generation_config:
+        stop_sequences:
+        - Paris
+      safety_settings: null
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      content-length:
+      - '416'
+      content-type:
+      - application/json; charset=UTF-8
+      server-timing:
+      - gfet4t7; dur=314
+      transfer-encoding:
+      - chunked
+      vary:
+      - Origin
+      - X-Origin
+      - Referer
+    parsed_body:
+      candidates:
+      - content:
+          parts:
+          - text: ''
+          role: model
+        finishReason: STOP
+      modelVersion: gemini-1.5-flash
+      usageMetadata:
+        promptTokenCount: 7
+        promptTokensDetails:
+        - modality: TEXT
+          tokenCount: 7
+        totalTokenCount: 7
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/test_settings/test_stop_settings[groq].yaml b/tests/cassettes/test_settings/test_stop_settings[groq].yaml
diff --git a/tests/cassettes/test_settings/test_stop_settings[mistral].yaml b/tests/cassettes/test_settings/test_stop_settings[mistral].yaml
diff --git a/tests/cassettes/test_settings/test_stop_settings[openai].yaml b/tests/cassettes/test_settings/test_stop_settings[openai].yaml
diff --git a/tests/conftest.py b/tests/conftest.py
diff --git a/tests/models/test_bedrock.py b/tests/models/test_bedrock.py
diff --git a/tests/test_settings.py b/tests/test_settings.py

Original file line number	Diff line number	Diff line change
`@@ -199,6 +199,7 @@ async def _completions_create(`
`199`	`199`	`top_p=model_settings.get('top_p', 1),`
`200`	`200`	`timeout_ms=self._get_timeout_ms(model_settings.get('timeout')),`
`201`	`201`	`random_seed=model_settings.get('seed', UNSET),`
	`202`	`+ stop=model_settings.get('stop_sequences', None),`
`202`	`203`	`)`
`203`	`204`	`except SDKError as e:`
`204`	`205`	`if (status_code := e.status_code) >= 400:`
`@@ -236,6 +237,7 @@ async def _stream_completions_create(`
`236`	`237`	`timeout_ms=self._get_timeout_ms(model_settings.get('timeout')),`
`237`	`238`	`presence_penalty=model_settings.get('presence_penalty'),`
`238`	`239`	`frequency_penalty=model_settings.get('frequency_penalty'),`
	`240`	`+ stop=model_settings.get('stop_sequences', None),`
`239`	`241`	`)`
`240`	`242`
`241`	`243`	`elif model_request_parameters.result_tools:`