Merge pull request #203 from microsoft/dayland/revert-search-index-skillset-changes-for-unknown-lang

dayland · web-flow · commit 439f42895ed9 · 2023-09-13T23:40:32.000+01:00
Fix token size default and model variant support
diff --git a/README.md b/README.md
@@ -132,11 +132,11 @@ The Information Assistant Accelerator requires access to one of the following Az
 Model Name | Supported Versions
 ---|---
 gpt-35-turbo | 0301, 0613
-gpt-35-turbo-16k | N/A
+**gpt-35-turbo-16k** | N/A
 **gpt-4** | N/A
 gpt-4-32k | N/A
 
-**Important:** It is recommended to use gpt-4 models to achieve the best results from the IA Accelerator. Access to gpt-4 requires approval which can be requested [here](https://aka.ms/oai/get-gpt4). If gpt-4 access is not available gpt-35-turbo (0613) is recommended.
+**Important:** It is recommended to use gpt-4 models to achieve the best results from the IA Accelerator. Access to gpt-4 requires approval which can be requested [here](https://aka.ms/oai/get-gpt4). If gpt-4 access is not available gpt-35-turbo-16k (0613) is recommended.
 
 ---
 
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -262,7 +262,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
         # STEP 3: Generate a contextual and content-specific answer using the search results and chat history.
         #Added conditional block to use different system messages for different models.
 
-        if self.model_name == "gpt-35-turbo":
+        if self.model_name.startswith("gpt-35-turbo"):
             messages = self.get_messages_from_history(
                 system_message,
                 self.chatgpt_deployment,
@@ -292,7 +292,7 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
             n=1
         )
             
-        elif self.model_name == "gpt-4":
+        elif self.model_name.startswith("gpt-4"):
             messages = self.get_messages_from_history(
                 "Sources:\n" + content + "\n\n" + system_message,
                 # system_message + "\n\nSources:\n" + content,
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -51,8 +51,9 @@ param searchIndexName string = 'all-files-index'
 param chatGptDeploymentName string = 'chat'
 param chatGptModelName string = 'gpt-35-turbo'
 param chatGptDeploymentCapacity int = 30
-// metadata in our chunking strategy adds about 180 tokens to the size of the chunk, our default target size is 750 tokens so the prameter is set to 570
-param chunkTargetSize string = '570' 
+// metadata in our chunking strategy adds about 180-200 tokens to the size of the chunks, 
+// our default target size is 750 tokens so the chunk files that get indexed will be around 950 tokens each
+param chunkTargetSize string = '750' 
 param targetPages string = 'ALL'
 param formRecognizerApiVersion string = '2022-08-31'
 param pdfSubmitQueue string = 'pdf-submit-queue'