Merge pull request #179 from Azure/release/2.2.3

placerda · web-flow · commit cc329eaf82f7 · 2026-03-24T12:10:00.000-03:00
Release v2.2.3
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,9 +3,15 @@
 All notable changes to this project will be documented in this file.  
 This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres to [Semantic Versioning](https://semver.org/).
 
-## [v2.2.3] – 2026-03-06
+## [v2.2.3] – 2026-03-24
+
 ### Changed
-- Added cron fallback defaults for blob ingestion jobs when `CRON_RUN_BLOB_INDEX` and `CRON_RUN_BLOB_PURGE` are not configured: blob indexing now runs hourly (`0 * * * *`) and blob purge runs at 10 minutes past each hour (`10 * * * *`).
+- **Default chunk overlap increased to 200 tokens**: Changed the default value of `TOKEN_OVERLAP` from `100` to `200` across all chunkers (doc_analysis, json, langchain, nl2sql, transcription), improving context continuity between chunks during document ingestion.
+- **Cron fallback defaults for blob ingestion jobs**: Added cron fallback defaults when `CRON_RUN_BLOB_INDEX` and `CRON_RUN_BLOB_PURGE` are not configured: blob indexing now runs hourly (`0 * * * *`) and blob purge runs at 10 minutes past each hour (`10 * * * *`).
+
+### Fixed
+- **Multimodal image captions not generated**: The `get_completion()` method in `AzureOpenAIClient` did not accept the `image_base64` parameter passed by the multimodal chunker, causing a `TypeError` on every caption generation call. The exception was caught silently and all image captions defaulted to "No caption available." Added vision support to `get_completion()` by accepting an optional `image_base64` parameter and constructing multimodal messages (text + image) using the OpenAI vision API format when an image is provided.
+- **Azure OpenAI API compatibility with newer models**: Replaced `max_tokens` with `max_completion_tokens` in the chat completions API call, fixing a 400 error (`unsupported_parameter`) when using newer models (e.g., GPT-4o) that reject the deprecated parameter.
 
 ## [v2.2.2] – 2026-02-04
 ### Fixed
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.2.2
+2.2.3
diff --git a/chunking/chunkers/doc_analysis_chunker.py b/chunking/chunkers/doc_analysis_chunker.py
@@ -57,7 +57,7 @@ def __init__(self, data, max_chunk_size=None, minimum_chunk_size=None, token_ove
         super().__init__(data)       
         self.max_chunk_size = max_chunk_size or int(app_config_client.get("CHUNKING_NUM_TOKENS", 2048))
         self.minimum_chunk_size = minimum_chunk_size or int(app_config_client.get("CHUNKING_MIN_CHUNK_SIZE", 100))
-        self.token_overlap = token_overlap or int(app_config_client.get("TOKEN_OVERLAP", 100))
+        self.token_overlap = token_overlap or int(app_config_client.get("TOKEN_OVERLAP", 200))
         self.docint_client = DocumentIntelligenceClient()
         self.supported_formats = self.docint_client.file_extensions
 
diff --git a/chunking/chunkers/json_chunker.py b/chunking/chunkers/json_chunker.py
@@ -18,7 +18,7 @@ def __init__(self, data, max_chunk_size=None, token_overlap=None, minimum_chunk_
         super().__init__(data)
         import os
         self.max_chunk_size = int(max_chunk_size or app_config_client.get("CHUNKING_NUM_TOKENS", 2048))
-        self.token_overlap = int(token_overlap or app_config_client.get("TOKEN_OVERLAP", 100))
+        self.token_overlap = int(token_overlap or app_config_client.get("TOKEN_OVERLAP", 200))
         self.minimum_chunk_size = int(minimum_chunk_size or app_config_client.get("CHUNKING_MIN_CHUNK_SIZE", 100))
 
     def get_chunks(self):
diff --git a/chunking/chunkers/langchain_chunker.py b/chunking/chunkers/langchain_chunker.py
@@ -49,7 +49,7 @@ def __init__(self, data):
         super().__init__(data)
         self.max_chunk_size = int(app_config_client.get("CHUNKING_NUM_TOKENS", "2048"))
         self.minimum_chunk_size = int(app_config_client.get("CHUNKING_MIN_CHUNK_SIZE", "100"))
-        self.token_overlap = int(app_config_client.get("TOKEN_OVERLAP", "100"))
+        self.token_overlap = int(app_config_client.get("TOKEN_OVERLAP", "200"))
         self.supported_formats = {
             "md": "markdown",
             "txt": "text",
diff --git a/chunking/chunkers/nl2sql_chunker.py b/chunking/chunkers/nl2sql_chunker.py
@@ -39,7 +39,7 @@ def __init__(self, data, max_chunk_size=None, token_overlap=None):
         """
         super().__init__(data)
         self.max_chunk_size = max_chunk_size or int(app_config_client.get("CHUNKING_NUM_TOKENS", "2048"))
-        self.token_overlap = token_overlap or 100
+        self.token_overlap = token_overlap or 200
 
     def get_chunks(self):
         chunks = []
diff --git a/chunking/chunkers/transcription_chunker.py b/chunking/chunkers/transcription_chunker.py
@@ -55,7 +55,7 @@ def __init__(self, data, max_chunk_size=None, token_overlap=None):
         """
         super().__init__(data)       
         self.max_chunk_size = max_chunk_size or int(app_config_client.get("CHUNKING_NUM_TOKENS", "2048"))
-        self.token_overlap = token_overlap or 100
+        self.token_overlap = token_overlap or 200
 
     def get_chunks(self):           
         chunks = [] 
diff --git a/tools/aoai.py b/tools/aoai.py
@@ -119,23 +119,33 @@ def get_completion(
         self,
         prompt: str,
         max_tokens: int = 800,
-        retry_after: bool = True
+        retry_after: bool = True,
+        image_base64: str = None
     ) -> str:
         # Truncate prompt if over token limit
         prompt_trunc = self._truncate_input(prompt, self.max_gpt_tokens)
 
+        # Build user message: multimodal (text + image) when image is provided
+        if image_base64:
+            user_content = [
+                {"type": "text", "text": prompt_trunc},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
+            ]
+        else:
+            user_content = prompt_trunc
+
         messages = [
             {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user",   "content": prompt_trunc}
+            {"role": "user",   "content": user_content}
         ]
 
         attempt = 0
         while True:
             try:
                 resp = self.client.chat.completions.create(
-                    model      = self.chat_deployment,
-                    messages   = messages,
-                    max_tokens = max_tokens
+                    model                = self.chat_deployment,
+                    messages             = messages,
+                    max_completion_tokens = max_tokens
                 )
                 return resp.choices[0].message.content