mistralai[patch]: ruff fixes and rules (#31918)

mdrxy · web-flow · commit cbb418b4bf2d · 2025-07-08T12:44:42.000-04:00
* bump ruff deps
* add more thorough ruff rules
* fix said rules
diff --git a/libs/partners/mistralai/langchain_mistralai/chat_models.py b/libs/partners/mistralai/langchain_mistralai/chat_models.py
diff --git a/libs/partners/mistralai/langchain_mistralai/embeddings.py b/libs/partners/mistralai/langchain_mistralai/embeddings.py
@@ -17,20 +17,20 @@
     model_validator,
 )
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
-from tokenizers import Tokenizer  # type: ignore
+from tokenizers import Tokenizer  # type: ignore[import]
 from typing_extensions import Self
 
 logger = logging.getLogger(__name__)
 
 MAX_TOKENS = 16_000
 """A batching parameter for the Mistral API. This is NOT the maximum number of tokens
-accepted by the embedding model for each document/chunk, but rather the maximum number 
+accepted by the embedding model for each document/chunk, but rather the maximum number
 of tokens that can be sent in a single request to the Mistral API (across multiple
 documents/chunks)"""
 
 
 class DummyTokenizer:
-    """Dummy tokenizer for when tokenizer cannot be accessed (e.g., via Huggingface)"""
+    """Dummy tokenizer for when tokenizer cannot be accessed (e.g., via Huggingface)."""
 
     @staticmethod
     def encode_batch(texts: list[str]) -> list[list[str]]:
@@ -126,9 +126,9 @@ class MistralAIEmbeddings(BaseModel, Embeddings):
     # The type for client and async_client is ignored because the type is not
     # an Optional after the model is initialized and the model_validator
     # is run.
-    client: httpx.Client = Field(default=None)  # type: ignore # : :meta private:
+    client: httpx.Client = Field(default=None)  # type: ignore[assignment] # :meta private:
 
-    async_client: httpx.AsyncClient = Field(  # type: ignore # : meta private:
+    async_client: httpx.AsyncClient = Field(  # type: ignore[assignment] # :meta private:
         default=None
     )
     mistral_api_key: SecretStr = Field(
@@ -153,7 +153,6 @@ class MistralAIEmbeddings(BaseModel, Embeddings):
     @model_validator(mode="after")
     def validate_environment(self) -> Self:
         """Validate configuration."""
-
         api_key_str = self.mistral_api_key.get_secret_value()
         # todo: handle retries
         if not self.client:
@@ -187,14 +186,14 @@ def validate_environment(self) -> Self:
                     "Could not download mistral tokenizer from Huggingface for "
                     "calculating batch sizes. Set a Huggingface token via the "
                     "HF_TOKEN environment variable to download the real tokenizer. "
-                    "Falling back to a dummy tokenizer that uses `len()`."
+                    "Falling back to a dummy tokenizer that uses `len()`.",
+                    stacklevel=2,
                 )
                 self.tokenizer = DummyTokenizer()
         return self
 
     def _get_batches(self, texts: list[str]) -> Iterable[list[str]]:
-        """Split a list of texts into batches of less than 16k tokens for Mistral
-        API."""
+        """Split list of texts into batches of less than 16k tokens for Mistral API."""
         batch: list[str] = []
         batch_tokens = 0
 
@@ -224,6 +223,7 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:
 
         Returns:
             List of embeddings, one for each text.
+
         """
         try:
             batch_responses = []
@@ -238,16 +238,17 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:
             def _embed_batch(batch: list[str]) -> Response:
                 response = self.client.post(
                     url="/embeddings",
-                    json=dict(
-                        model=self.model,
-                        input=batch,
-                    ),
+                    json={
+                        "model": self.model,
+                        "input": batch,
+                    },
                 )
                 response.raise_for_status()
                 return response
 
-            for batch in self._get_batches(texts):
-                batch_responses.append(_embed_batch(batch))
+            batch_responses = [
+                _embed_batch(batch) for batch in self._get_batches(texts)
+            ]
             return [
                 list(map(float, embedding_obj["embedding"]))
                 for response in batch_responses
@@ -265,16 +266,17 @@ async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
 
         Returns:
             List of embeddings, one for each text.
+
         """
         try:
             batch_responses = await asyncio.gather(
                 *[
                     self.async_client.post(
                         url="/embeddings",
-                        json=dict(
-                            model=self.model,
-                            input=batch,
-                        ),
+                        json={
+                            "model": self.model,
+                            "input": batch,
+                        },
                     )
                     for batch in self._get_batches(texts)
                 ]
@@ -296,6 +298,7 @@ def embed_query(self, text: str) -> list[float]:
 
         Returns:
             Embedding for the text.
+
         """
         return self.embed_documents([text])[0]
 
@@ -307,5 +310,6 @@ async def aembed_query(self, text: str) -> list[float]:
 
         Returns:
             Embedding for the text.
+
         """
         return (await self.aembed_documents([text]))[0]
diff --git a/libs/partners/mistralai/pyproject.toml b/libs/partners/mistralai/pyproject.toml
@@ -48,8 +48,62 @@ disallow_untyped_defs = "True"
 target-version = "py39"
 
 [tool.ruff.lint]
-select = ["E", "F", "I", "T201", "UP", "S"]
-ignore = [ "UP007", ]
+select = [
+    "A",      # flake8-builtins
+    "B",      # flake8-bugbear
+    "ASYNC",  # flake8-async
+    "C4",     # flake8-comprehensions
+    "COM",    # flake8-commas
+    "D",      # pydocstyle
+    "DOC",    # pydoclint
+    "E",      # pycodestyle error
+    "EM",     # flake8-errmsg
+    "F",      # pyflakes
+    "FA",     # flake8-future-annotations
+    "FBT",    # flake8-boolean-trap
+    "FLY",    # flake8-flynt
+    "I",      # isort
+    "ICN",    # flake8-import-conventions
+    "INT",    # flake8-gettext
+    "ISC",    # isort-comprehensions
+    "PGH",    # pygrep-hooks
+    "PIE",    # flake8-pie
+    "PERF",   # flake8-perf
+    "PYI",    # flake8-pyi
+    "Q",      # flake8-quotes
+    "RET",    # flake8-return
+    "RSE",    # flake8-rst-docstrings
+    "RUF",    # ruff
+    "S",      # flake8-bandit
+    "SLF",    # flake8-self
+    "SLOT",   # flake8-slots
+    "SIM",    # flake8-simplify
+    "T10",    # flake8-debugger
+    "T20",    # flake8-print
+    "TID",    # flake8-tidy-imports
+    "UP",     # pyupgrade
+    "W",      # pycodestyle warning
+    "YTT",    # flake8-2020
+]
+ignore = [
+    "D100",    # pydocstyle: Missing docstring in public module
+    "D101",    # pydocstyle: Missing docstring in public class
+    "D102",    # pydocstyle: Missing docstring in public method
+    "D103",    # pydocstyle: Missing docstring in public function
+    "D104",    # pydocstyle: Missing docstring in public package
+    "D105",    # pydocstyle: Missing docstring in magic method
+    "D107",    # pydocstyle: Missing docstring in __init__
+    "D203",    # Messes with the formatter
+    "D407",    # pydocstyle: Missing-dashed-underline-after-section
+    "COM812",  # Messes with the formatter
+    "ISC001",  # Messes with the formatter
+    "PERF203", # Rarely useful
+    "S112",    # Rarely useful
+    "RUF012",  # Doesn't play well with Pydantic
+    "SLF001",  # Private member access
+    "UP007",   # pyupgrade: non-pep604-annotation-union
+    "UP045",   # pyupgrade: non-pep604-annotation-optional
+]
 
 [tool.coverage.run]
 omit = ["tests/*"]
diff --git a/libs/partners/mistralai/tests/integration_tests/test_chat_models.py b/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
@@ -1,5 +1,7 @@
 """Test ChatMistral chat model."""
 
+from __future__ import annotations
+
 import json
 import logging
 import time
@@ -43,11 +45,12 @@ async def test_astream() -> None:
         if token.response_metadata:
             chunks_with_response_metadata += 1
     if chunks_with_token_counts != 1 or chunks_with_response_metadata != 1:
-        raise AssertionError(
+        msg = (
             "Expected exactly one chunk with token counts or response_metadata. "
             "AIMessageChunk aggregation adds / appends counts and metadata. Check that "
             "this is behaving properly."
         )
+        raise AssertionError(msg)
     assert isinstance(full, AIMessageChunk)
     assert full.usage_metadata is not None
     assert full.usage_metadata["input_tokens"] > 0
@@ -61,7 +64,7 @@ async def test_astream() -> None:
 
 
 async def test_abatch() -> None:
-    """Test streaming tokens from ChatMistralAI"""
+    """Test streaming tokens from ChatMistralAI."""
     llm = ChatMistralAI()
 
     result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
@@ -70,7 +73,7 @@ async def test_abatch() -> None:
 
 
 async def test_abatch_tags() -> None:
-    """Test batch tokens from ChatMistralAI"""
+    """Test batch tokens from ChatMistralAI."""
     llm = ChatMistralAI()
 
     result = await llm.abatch(
@@ -81,7 +84,7 @@ async def test_abatch_tags() -> None:
 
 
 def test_batch() -> None:
-    """Test batch tokens from ChatMistralAI"""
+    """Test batch tokens from ChatMistralAI."""
     llm = ChatMistralAI()
 
     result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
@@ -90,7 +93,7 @@ def test_batch() -> None:
 
 
 async def test_ainvoke() -> None:
-    """Test invoke tokens from ChatMistralAI"""
+    """Test invoke tokens from ChatMistralAI."""
     llm = ChatMistralAI()
 
     result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
@@ -99,10 +102,10 @@ async def test_ainvoke() -> None:
 
 
 def test_invoke() -> None:
-    """Test invoke tokens from ChatMistralAI"""
+    """Test invoke tokens from ChatMistralAI."""
     llm = ChatMistralAI()
 
-    result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
+    result = llm.invoke("I'm Pickle Rick", config={"tags": ["foo"]})
     assert isinstance(result.content, str)
 
 
@@ -178,13 +181,11 @@ class Person(BaseModel):
 
     structured_llm = llm.with_structured_output(Person)
     strm = structured_llm.stream("Erick, 27 years old")
-    chunk_num = 0
-    for chunk in strm:
+    for chunk_num, chunk in enumerate(strm):
         assert chunk_num == 0, "should only have one chunk with model"
         assert isinstance(chunk, Person)
         assert chunk.name == "Erick"
         assert chunk.age == 27
-        chunk_num += 1
 
 
 class Book(BaseModel):
@@ -201,7 +202,7 @@ def _check_parsed_result(result: Any, schema: Any) -> None:
     if schema == Book:
         assert isinstance(result, Book)
     else:
-        assert all(key in ["name", "authors"] for key in result.keys())
+        assert all(key in ["name", "authors"] for key in result)
 
 
 @pytest.mark.parametrize("schema", [Book, BookDict, Book.model_json_schema()])
diff --git a/libs/partners/mistralai/tests/integration_tests/test_compile.py b/libs/partners/mistralai/tests/integration_tests/test_compile.py
@@ -4,4 +4,3 @@
 @pytest.mark.compile
 def test_placeholder() -> None:
     """Used for compiling integration tests without running any real tests."""
-    pass
diff --git a/libs/partners/mistralai/tests/integration_tests/test_embeddings.py b/libs/partners/mistralai/tests/integration_tests/test_embeddings.py
@@ -1,4 +1,4 @@
-"""Test MistralAI Embedding"""
+"""Test MistralAI Embedding."""
 
 from langchain_mistralai import MistralAIEmbeddings
 
diff --git a/libs/partners/mistralai/tests/integration_tests/test_standard.py b/libs/partners/mistralai/tests/integration_tests/test_standard.py
@@ -1,4 +1,4 @@
-"""Standard LangChain interface tests"""
+"""Standard LangChain interface tests."""
 
 from langchain_core.language_models import BaseChatModel
 from langchain_tests.integration_tests import (  # type: ignore[import-not-found]
diff --git a/libs/partners/mistralai/tests/unit_tests/test_chat_models.py b/libs/partners/mistralai/tests/unit_tests/test_chat_models.py
@@ -84,23 +84,23 @@ def test_mistralai_initialization_baseurl_env(env_var_name: str) -> None:
     [
         (
             SystemMessage(content="Hello"),
-            dict(role="system", content="Hello"),
+            {"role": "system", "content": "Hello"},
         ),
         (
             HumanMessage(content="Hello"),
-            dict(role="user", content="Hello"),
+            {"role": "user", "content": "Hello"},
         ),
         (
             AIMessage(content="Hello"),
-            dict(role="assistant", content="Hello"),
+            {"role": "assistant", "content": "Hello"},
         ),
         (
             AIMessage(content="{", additional_kwargs={"prefix": True}),
-            dict(role="assistant", content="{", prefix=True),
+            {"role": "assistant", "content": "{", "prefix": True},
         ),
         (
             ChatMessage(role="assistant", content="Hello"),
-            dict(role="assistant", content="Hello"),
+            {"role": "assistant", "content": "Hello"},
         ),
     ],
 )
@@ -112,17 +112,17 @@ def test_convert_message_to_mistral_chat_message(
 
 
 def _make_completion_response_from_token(token: str) -> dict:
-    return dict(
-        id="abc123",
-        model="fake_model",
-        choices=[
-            dict(
-                index=0,
-                delta=dict(content=token),
-                finish_reason=None,
-            )
+    return {
+        "id": "abc123",
+        "model": "fake_model",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"content": token},
+                "finish_reason": None,
+            }
         ],
-    )
+    }
 
 
 def mock_chat_stream(*args: Any, **kwargs: Any) -> Generator:
@@ -275,8 +275,7 @@ def test_extra_kwargs() -> None:
 
 
 def test_retry_with_failure_then_success() -> None:
-    """Test that retry mechanism works correctly when
-    first request fails and second succeeds."""
+    """Test retry mechanism works correctly when fiest request fails, second succeed."""
     # Create a real ChatMistralAI instance
     chat = ChatMistralAI(max_retries=3)
 
@@ -289,7 +288,8 @@ def mock_post(*args: Any, **kwargs: Any) -> MagicMock:
         call_count += 1
 
         if call_count == 1:
-            raise httpx.RequestError("Connection error", request=MagicMock())
+            msg = "Connection error"
+            raise httpx.RequestError(msg, request=MagicMock())
 
         mock_response = MagicMock()
         mock_response.status_code = 200
diff --git a/libs/partners/mistralai/tests/unit_tests/test_standard.py b/libs/partners/mistralai/tests/unit_tests/test_standard.py
@@ -1,4 +1,4 @@
-"""Standard LangChain interface tests"""
+"""Standard LangChain interface tests."""
 
 from langchain_core.language_models import BaseChatModel
 from langchain_tests.unit_tests import (  # type: ignore[import-not-found]
diff --git a/libs/partners/mistralai/uv.lock b/libs/partners/mistralai/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-"""Test MistralAI Embedding"""`
	`1`	`+"""Test MistralAI Embedding."""`
`2`	`2`
`3`	`3`	`from langchain_mistralai import MistralAIEmbeddings`
`4`	`4`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-"""Standard LangChain interface tests"""`
	`1`	`+"""Standard LangChain interface tests."""`
`2`	`2`
`3`	`3`	`from langchain_core.language_models import BaseChatModel`
`4`	`4`	`from langchain_tests.integration_tests import ( # type: ignore[import-not-found]`