Azure-Samples · pamelafox · Nov 5, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -5,7 +5,7 @@
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Python: Quart",
+            "name": "Backend (Python)",
             "type": "debugpy",
             "request": "launch",
             "module": "quart",
@@ -22,18 +22,17 @@
                 "-p 50505"
             ],
             "console": "integratedTerminal",
-            "justMyCode": false,
-            "envFile": "${input:dotEnvFilePath}",
+            "justMyCode": false
         },
         {
-            "name": "Frontend: watch",
+            "name": "Frontend",
             "type": "node-terminal",
             "request": "launch",
             "command": "npm run dev",
             "cwd": "${workspaceFolder}/app/frontend",
         },
         {
-            "name": "Python: Debug Tests",
+            "name": "Tests (Python)",
             "type": "debugpy",
             "request": "launch",
             "program": "${file}",
@@ -42,11 +41,11 @@
             "justMyCode": false
           }
     ],
-    "inputs": [
+    "compounds": [
         {
-            "id": "dotEnvFilePath",
-            "type": "command",
-            "command": "azure-dev.commands.getDotEnvFilePath"
+          "name": "Frontend & Backend",
+          "configurations": ["Backend (Python)", "Frontend"],
+          "stopAll": true
         }
     ]
 }
diff --git a/docs/localdev.md b/docs/localdev.md
@@ -1,5 +1,16 @@
 # Local development of Chat App
 
+After deploying the app to Azure, you may want to continue development locally. This guide explains how to run the app locally, including hot reloading and debugging.
+
+* [Running development server from the command line](#running-development-server-from-the-command-line)
+* [Hot reloading frontend and backend files](#hot-reloading-frontend-and-backend-files)
+* [Using VS Code "Run and Debug"](#using-vs-code-run-and-debug)
+* [Using a local OpenAI-compatible API](#using-a-local-openai-compatible-api)
+  * [Using Ollama server](#using-ollama-server)
+  * [Using llamafile server](#using-llamafile-server)
+
+## Running development server from the command line
+
 You can only run locally **after** having successfully run the `azd up` command. If you haven't yet, follow the steps in [Azure deployment](../README.md#azure-deployment) above.
 
 1. Run `azd auth login`
@@ -40,6 +51,16 @@ Navigate to the URL shown in the terminal (in this case, `http://localhost:5173/
 
 Then, whenever you make changes to frontend files, the changes will be automatically reloaded, without any browser refresh needed.
 
+## Using VS Code "Run and Debug"
+
+This project includes configurations defined in `.vscode/launch.json` that allow you to run and debug the app directly from VS Code:
+
+* "Backend (Python)": Starts the Python backend server, defaulting to port 50505.
+* "Frontend": Starts the frontend server using Vite, typically at port 5173.
+* "Frontend & Backend": A compound configuration that starts both the frontend and backend servers.
+
+When you run these configurations, you can set breakpoints in your code and debug as you would in a normal VS Code debugging session.
+
 ## Using a local OpenAI-compatible API
 
 You may want to save costs by developing against a local LLM server, such as
@@ -60,9 +81,9 @@ You should now be able to use the "Ask" tab.
 
 ⚠️ Limitations:
 
-- The "Chat" tab will only work if the local language model supports function calling.
-- Your search mode must be text only (no vectors), since the search index is only populated with OpenAI-generated embeddings, and the local OpenAI host can't generate those.
-- The conversation history will be truncated using the GPT tokenizers, which may not be the same as the local model's tokenizer, so if you have a long conversation, you may end up with token limit errors.
+* The "Chat" tab will only work if the local language model supports function calling.
+* Your search mode must be text only (no vectors), since the search index is only populated with OpenAI-generated embeddings, and the local OpenAI host can't generate those.
+* The conversation history will be truncated using the GPT tokenizers, which may not be the same as the local model's tokenizer, so if you have a long conversation, you may end up with token limit errors.
 
 > [!NOTE]
 > You must set `OPENAI_HOST` back to a non-local value ("azure", "azure_custom", or "openai")