Add Ability to Specify Gemini Safety Settings (#790)

dAIsySHEng1 · sydney-runkle · web-flow · commit b2adad9a9fcb · 2025-02-06T11:58:11.000-05:00
Co-authored-by: Sydney Runkle &lt;54324534+sydney-runkle@users.noreply.github.com&gt;
Co-authored-by: sydney-runkle &lt;sydneymarierunkle@gmail.com&gt;
diff --git a/docs/agents.md b/docs/agents.md
@@ -206,26 +206,44 @@ print(result_sync.data)
 
 ### Model specific settings
 
-<!-- TODO: replace this with the gemini safety settings example once added via https://github.com/pydantic/pydantic-ai/issues/373 -->
-
-If you wish to further customize model behavior, you can use a subclass of [`ModelSettings`][pydantic_ai.settings.ModelSettings], like [`AnthropicModelSettings`][pydantic_ai.models.anthropic.AnthropicModelSettings], associated with your model of choice.
+If you wish to further customize model behavior, you can use a subclass of [`ModelSettings`][pydantic_ai.settings.ModelSettings], like [`GeminiModelSettings`][pydantic_ai.models.gemini.GeminiModelSettings], associated with your model of choice.
 
 For example:
 
 ```py
-from pydantic_ai import Agent
-from pydantic_ai.models.anthropic import AnthropicModelSettings
+from pydantic_ai import Agent, UnexpectedModelBehavior
+from pydantic_ai.models.gemini import GeminiModelSettings
 
-agent = Agent('anthropic:claude-3-5-sonnet-latest')
+agent = Agent('google-gla:gemini-1.5-flash')
 
-result_sync = agent.run_sync(
-    'What is the capital of Italy?',
-    model_settings=AnthropicModelSettings(anthropic_metadata={'user_id': 'my_user_id'}),
-)
-print(result_sync.data)
-#> Rome
+try:
+    result = agent.run_sync(
+        'Write a list of 5 very rude things that I might say to the universe after stubbing my toe in the dark:',
+        model_settings=GeminiModelSettings(
+            temperature=0.0,  # general model settings can also be specified
+            gemini_safety_settings=[
+                {
+                    'category': 'HARM_CATEGORY_HARASSMENT',
+                    'threshold': 'BLOCK_LOW_AND_ABOVE',
+                },
+                {
+                    'category': 'HARM_CATEGORY_HATE_SPEECH',
+                    'threshold': 'BLOCK_LOW_AND_ABOVE',
+                },
+            ],
+        ),
+    )
+except UnexpectedModelBehavior as e:
+    print(e)  # (1)!
+    """
+    Safety settings triggered, body:
+    <safety settings details>
+    """
 ```
 
+1. This error is raised because the safety thresholds were exceeded.
+Generally, `result` would contain a normal `ModelResponse`.
+
 ## Runs vs. Conversations
 
 An agent **run** might represent an entire conversation — there's no limit to how many messages can be exchanged in a single run. However, a **conversation** might also be composed of multiple runs, especially if you need to maintain state between separate interactions or API calls.
diff --git a/pydantic_ai_slim/pydantic_ai/models/gemini.py b/pydantic_ai_slim/pydantic_ai/models/gemini.py
@@ -57,7 +57,7 @@
 class GeminiModelSettings(ModelSettings):
     """Settings used for a Gemini model request."""
 
-    # This class is a placeholder for any future gemini-specific settings
+    gemini_safety_settings: list[GeminiSafetySettings]
 
 
 @dataclass(init=False)
@@ -192,6 +192,8 @@ async def _make_request(
                 generation_config['presence_penalty'] = presence_penalty
             if (frequency_penalty := model_settings.get('frequency_penalty')) is not None:
                 generation_config['frequency_penalty'] = frequency_penalty
+            if (gemini_safety_settings := model_settings.get('gemini_safety_settings')) != []:
+                request_data['safety_settings'] = gemini_safety_settings
         if generation_config:
             request_data['generation_config'] = generation_config
 
@@ -220,6 +222,11 @@ async def _make_request(
     def _process_response(self, response: _GeminiResponse) -> ModelResponse:
         if len(response['candidates']) != 1:
             raise UnexpectedModelBehavior('Expected exactly one candidate in Gemini response')
+        if 'content' not in response['candidates'][0]:
+            if response['candidates'][0].get('finish_reason') == 'SAFETY':
+                raise UnexpectedModelBehavior('Safety settings triggered', str(response))
+            else:
+                raise UnexpectedModelBehavior('Content field missing from Gemini response', str(response))
         parts = response['candidates'][0]['content']['parts']
         return _process_response_from_parts(parts, model_name=self.model_name)
 
@@ -237,7 +244,7 @@ async def _process_streamed_response(self, http_response: HTTPResponse) -> Strea
             )
             if responses:
                 last = responses[-1]
-                if last['candidates'] and last['candidates'][0]['content']['parts']:
+                if last['candidates'] and last['candidates'][0].get('content', {}).get('parts'):
                     start_response = last
                     break
 
@@ -310,6 +317,8 @@ class GeminiStreamedResponse(StreamedResponse):
     async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
         async for gemini_response in self._get_gemini_responses():
             candidate = gemini_response['candidates'][0]
+            if 'content' not in candidate:
+                raise UnexpectedModelBehavior('Streamed response has no content field')
             gemini_part: _GeminiPartUnion
             for gemini_part in candidate['content']['parts']:
                 if 'text' in gemini_part:
@@ -383,6 +392,7 @@ class _GeminiRequest(TypedDict):
     contents: list[_GeminiContent]
     tools: NotRequired[_GeminiTools]
     tool_config: NotRequired[_GeminiToolConfig]
+    safety_settings: NotRequired[list[GeminiSafetySettings]]
     # we don't implement `generationConfig`, instead we use a named tool for the response
     system_instruction: NotRequired[_GeminiTextContent]
     """
@@ -392,6 +402,38 @@ class _GeminiRequest(TypedDict):
     generation_config: NotRequired[_GeminiGenerationConfig]
 
 
+class GeminiSafetySettings(TypedDict):
+    """Safety settings options for Gemini model request.
+
+    See [Gemini API docs](https://ai.google.dev/gemini-api/docs/safety-settings) for safety category and threshold descriptions.
+    For an example on how to use `GeminiSafetySettings`, see [here](../../agents.md#model-specific-settings).
+    """
+
+    category: Literal[
+        'HARM_CATEGORY_UNSPECIFIED',
+        'HARM_CATEGORY_HARASSMENT',
+        'HARM_CATEGORY_HATE_SPEECH',
+        'HARM_CATEGORY_SEXUALLY_EXPLICIT',
+        'HARM_CATEGORY_DANGEROUS_CONTENT',
+        'HARM_CATEGORY_CIVIC_INTEGRITY',
+    ]
+    """
+    Safety settings category.
+    """
+
+    threshold: Literal[
+        'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
+        'BLOCK_LOW_AND_ABOVE',
+        'BLOCK_MEDIUM_AND_ABOVE',
+        'BLOCK_ONLY_HIGH',
+        'BLOCK_NONE',
+        'OFF',
+    ]
+    """
+    Safety settings threshold.
+    """
+
+
 class _GeminiGenerationConfig(TypedDict, total=False):
     """Schema for an API request to the Gemini API.
 
@@ -568,8 +610,8 @@ class _GeminiResponse(TypedDict):
 class _GeminiCandidates(TypedDict):
     """See <https://ai.google.dev/api/generate-content#v1beta.Candidate>."""
 
-    content: _GeminiContent
-    finish_reason: NotRequired[Annotated[Literal['STOP', 'MAX_TOKENS'], pydantic.Field(alias='finishReason')]]
+    content: NotRequired[_GeminiContent]
+    finish_reason: NotRequired[Annotated[Literal['STOP', 'MAX_TOKENS', 'SAFETY'], pydantic.Field(alias='finishReason')]]
     """
     See <https://ai.google.dev/api/generate-content#FinishReason>, lots of other values are possible,
     but let's wait until we see them and know what they mean to add them here.
@@ -617,6 +659,7 @@ class _GeminiSafetyRating(TypedDict):
         'HARM_CATEGORY_CIVIC_INTEGRITY',
     ]
     probability: Literal['NEGLIGIBLE', 'LOW', 'MEDIUM', 'HIGH']
+    blocked: NotRequired[bool]
 
 
 class _GeminiPromptFeedback(TypedDict):
diff --git a/tests/models/test_gemini.py b/tests/models/test_gemini.py
@@ -28,6 +28,7 @@
 from pydantic_ai.models.gemini import (
     ApiKeyAuth,
     GeminiModel,
+    GeminiModelSettings,
     _content_model_response,
     _function_call_part_from_call,
     _gemini_response_ta,
@@ -37,6 +38,7 @@
     _GeminiFunction,
     _GeminiFunctionCallingConfig,
     _GeminiResponse,
+    _GeminiSafetyRating,
     _GeminiTextPart,
     _GeminiToolConfig,
     _GeminiTools,
@@ -865,3 +867,90 @@ def handler(request: httpx.Request) -> httpx.Response:
         },
     )
     assert result.data == 'world'
+
+
+def gemini_no_content_response(
+    safety_ratings: list[_GeminiSafetyRating], finish_reason: Literal['SAFETY'] | None = 'SAFETY'
+) -> _GeminiResponse:
+    candidate = _GeminiCandidates(safety_ratings=safety_ratings)
+    if finish_reason:
+        candidate['finish_reason'] = finish_reason
+    return _GeminiResponse(candidates=[candidate], usage_metadata=example_usage())
+
+
+async def test_safety_settings_unsafe(
+    client_with_handler: ClientWithHandler, env: TestEnv, allow_model_requests: None
+) -> None:
+    try:
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            safety_settings = json.loads(request.content)['safety_settings']
+            assert safety_settings == [
+                {'category': 'HARM_CATEGORY_CIVIC_INTEGRITY', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+                {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+            ]
+
+            return httpx.Response(
+                200,
+                content=_gemini_response_ta.dump_json(
+                    gemini_no_content_response(
+                        finish_reason='SAFETY',
+                        safety_ratings=[
+                            {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'MEDIUM', 'blocked': True}
+                        ],
+                    ),
+                    by_alias=True,
+                ),
+                headers={'Content-Type': 'application/json'},
+            )
+
+        gemini_client = client_with_handler(handler)
+        m = GeminiModel('gemini-1.5-flash', http_client=gemini_client, api_key='mock')
+        agent = Agent(m)
+
+        await agent.run(
+            'a request for something rude',
+            model_settings=GeminiModelSettings(
+                gemini_safety_settings=[
+                    {'category': 'HARM_CATEGORY_CIVIC_INTEGRITY', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+                    {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+                ]
+            ),
+        )
+    except UnexpectedModelBehavior as e:
+        assert repr(e) == "UnexpectedModelBehavior('Safety settings triggered')"
+
+
+async def test_safety_settings_safe(
+    client_with_handler: ClientWithHandler, env: TestEnv, allow_model_requests: None
+) -> None:
+    def handler(request: httpx.Request) -> httpx.Response:
+        safety_settings = json.loads(request.content)['safety_settings']
+        assert safety_settings == [
+            {'category': 'HARM_CATEGORY_CIVIC_INTEGRITY', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+            {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+        ]
+
+        return httpx.Response(
+            200,
+            content=_gemini_response_ta.dump_json(
+                gemini_response(_content_model_response(ModelResponse(parts=[TextPart('world')]))),
+                by_alias=True,
+            ),
+            headers={'Content-Type': 'application/json'},
+        )
+
+    gemini_client = client_with_handler(handler)
+    m = GeminiModel('gemini-1.5-flash', http_client=gemini_client, api_key='mock')
+    agent = Agent(m)
+
+    result = await agent.run(
+        'hello',
+        model_settings=GeminiModelSettings(
+            gemini_safety_settings=[
+                {'category': 'HARM_CATEGORY_CIVIC_INTEGRITY', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+                {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_LOW_AND_ABOVE'},
+            ]
+        ),
+    )
+    assert result.data == 'world'
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -17,6 +17,7 @@
 from pytest_mock import MockerFixture
 
 from pydantic_ai._utils import group_by_temporal
+from pydantic_ai.exceptions import UnexpectedModelBehavior
 from pydantic_ai.messages import (
     ModelMessage,
     ModelResponse,
@@ -288,6 +289,8 @@ async def model_logic(messages: list[ModelMessage], info: AgentInfo) -> ModelRes
                     )
                 ]
             )
+        elif m.content.startswith('Write a list of 5 very rude things that I might say'):
+            raise UnexpectedModelBehavior('Safety settings triggered', body='<safety settings details>')
         elif m.content.startswith('<examples>\n  <user>'):
             return ModelResponse(parts=[ToolCallPart(tool_name='final_result_EmailOk', args={})])
         elif m.content == 'Ask a simple question with a single correct answer.' and len(messages) > 2:

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@`
`17`	`17`	`from pytest_mock import MockerFixture`
`18`	`18`
`19`	`19`	`from pydantic_ai._utils import group_by_temporal`
	`20`	`+from pydantic_ai.exceptions import UnexpectedModelBehavior`
`20`	`21`	`from pydantic_ai.messages import (`
`21`	`22`	`ModelMessage,`
`22`	`23`	`ModelResponse,`
`@@ -288,6 +289,8 @@ async def model_logic(messages: list[ModelMessage], info: AgentInfo) -> ModelRes`
`288`	`289`	`)`
`289`	`290`	`]`
`290`	`291`	`)`
	`292`	`+ elif m.content.startswith('Write a list of 5 very rude things that I might say'):`
	`293`	`+ raise UnexpectedModelBehavior('Safety settings triggered', body='<safety settings details>')`
`291`	`294`	`elif m.content.startswith('<examples>\n <user>'):`
`292`	`295`	`return ModelResponse(parts=[ToolCallPart(tool_name='final_result_EmailOk', args={})])`
`293`	`296`	`elif m.content == 'Ask a simple question with a single correct answer.' and len(messages) > 2:`