diff --git a/agent_test_data/get_weather_kakinada.json b/agent_test_data/get_weather_kakinada.json
new file mode 100644
index 00000000..07d53d17
--- /dev/null
+++ b/agent_test_data/get_weather_kakinada.json
@@ -0,0 +1,259 @@
+{
+  "use_case": "Get weather for Kakinada",
+  "user_prompt": "What's the weather in Kakinada?",
+  "tool_calls": [
+    {
+      "url": "https://geocoding-api.open-meteo.com/v1/search?name=Kakinada&count=1&language=en&format=json",
+      "method": "GET",
+      "fields": {
+        "url": "https://geocoding-api.open-meteo.com/v1/search?name=Kakinada&count=1&language=en&format=json",
+        "method": "GET",
+        "headers": "{\"Content-Type\":\"application/json\"}",
+        "body": ""
+      },
+      "response": {
+        "success": true,
+        "action": "GET",
+        "status": 200,
+        "url": "https://geocoding-api.open-meteo.com/v1/search?name=Kakinada&count=1&language=en&format=json",
+        "output": {
+          "generationtime_ms": 0.15807152,
+          "results": [
+            {
+              "admin1": "Andhra Pradesh",
+              "admin1_id": 1278629,
+              "admin2": "Kakinada",
+              "admin2_id": 12680261,
+              "admin3": "Kakinada Urban",
+              "admin3_id": 12686894,
+              "country": "India",
+              "country_code": "IN",
+              "country_id": 1269750,
+              "elevation": 6,
+              "feature_code": "PPL",
+              "id": 1268561,
+              "latitude": 16.96036,
+              "longitude": 82.23809,
+              "name": "Kākināda",
+              "population": 384182,
+              "timezone": "Asia/Kolkata"
+            }
+          ]
+        },
+        "raw_response": {
+          "Result": "",
+          "body": {
+            "generationtime_ms": 0.15807152,
+            "results": [
+              {
+                "admin1": "Andhra Pradesh",
+                "admin1_id": 1278629,
+                "admin2": "Kakinada",
+                "admin2_id": 12680261,
+                "admin3": "Kakinada Urban",
+                "admin3_id": 12686894,
+                "country": "India",
+                "country_code": "IN",
+                "country_id": 1269750,
+                "elevation": 6,
+                "feature_code": "PPL",
+                "id": 1268561,
+                "latitude": 16.96036,
+                "longitude": 82.23809,
+                "name": "Kākināda",
+                "population": 384182,
+                "timezone": "Asia/Kolkata"
+              }
+            ]
+          },
+          "headers": {
+            "Connection": "keep-alive",
+            "Content-Encoding": "deflate",
+            "Content-Length": "257",
+            "Content-Type": "application/json; charset=utf-8",
+            "Date": "Sat, 27 Dec 2025 06:22:08 GMT",
+            "X-Encoding-Time": "0.0031948089599609375 ms"
+          },
+          "status": 200,
+          "success": true,
+          "url": "https://geocoding-api.open-meteo.com/v1/search?name=Kakinada&count=1&language=en&format=json"
+        },
+        "retries": 1
+      }
+    },
+    {
+      "url": "https://api.open-meteo.com/v1/forecast?latitude=16.96036&longitude=82.23809&timezone=Asia%2FKolkata&current=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code,wind_speed_10m,wind_direction_10m&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code&forecast_days=3",
+      "method": "GET",
+      "fields": {
+        "url": "https://api.open-meteo.com/v1/forecast?latitude=16.96036&longitude=82.23809&timezone=Asia%2FKolkata&current=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code,wind_speed_10m,wind_direction_10m&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code&forecast_days=3",
+        "method": "GET",
+        "headers": "{\"Content-Type\":\"application/json\"}",
+        "body": ""
+      },
+      "response": {
+        "success": true,
+        "action": "GET",
+        "status": 200,
+        "url": "https://api.open-meteo.com/v1/forecast?latitude=16.96036&longitude=82.23809&timezone=Asia%2FKolkata&current=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code,wind_speed_10m,wind_direction_10m&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code&forecast_days=3",
+        "output": {
+          "current": {
+            "apparent_temperature": 30.6,
+            "interval": 900,
+            "precipitation": 0,
+            "relative_humidity_2m": 57,
+            "temperature_2m": 27.4,
+            "time": "2025-12-27T11:45",
+            "weather_code": 1,
+            "wind_speed_10m": 6
+          },
+          "current_units": {
+            "apparent_temperature": "°C",
+            "interval": "seconds",
+            "precipitation": "mm",
+            "relative_humidity_2m": "%",
+            "temperature_2m": "°C",
+            "time": "iso8601",
+            "weather_code": "wmo code",
+            "wind_speed_10m": "km/h"
+          },
+          "elevation": 7,
+          "generationtime_ms": 0.3038644790649414,
+          "latitude": 16.875,
+          "longitude": 82.25,
+          "timezone": "Asia/Kolkata",
+          "timezone_abbreviation": "GMT+5:30",
+          "utc_offset_seconds": 19800
+        },
+        "raw_response": {
+          "Result": "",
+          "body": {
+            "current": {
+              "apparent_temperature": 30.6,
+              "interval": 900,
+              "precipitation": 0,
+              "relative_humidity_2m": 57,
+              "temperature_2m": 27.4,
+              "time": "2025-12-27T11:45",
+              "weather_code": 1,
+              "wind_speed_10m": 6
+            },
+            "current_units": {
+              "apparent_temperature": "°C",
+              "interval": "seconds",
+              "precipitation": "mm",
+              "relative_humidity_2m": "%",
+              "temperature_2m": "°C",
+              "time": "iso8601",
+              "weather_code": "wmo code",
+              "wind_speed_10m": "km/h"
+            },
+            "elevation": 7,
+            "generationtime_ms": 0.3038644790649414,
+            "latitude": 16.875,
+            "longitude": 82.25,
+            "timezone": "Asia/Kolkata",
+            "timezone_abbreviation": "GMT+5:30",
+            "utc_offset_seconds": 19800
+          },
+          "headers": {
+            "Connection": "keep-alive",
+            "Content-Encoding": "deflate",
+            "Content-Type": "application/json; charset=utf-8",
+            "Date": "Sat, 27 Dec 2025 06:22:17 GMT",
+            "Transfer-Encoding": "chunked"
+          },
+          "status": 200,
+          "success": true,
+          "url": "https://api.open-meteo.com/v1/forecast?latitude=16.96036&longitude=82.23809&timezone=Asia%2FKolkata&current=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code,wind_speed_10m,wind_direction_10m&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code&forecast_days=3"
+        },
+        "retries": 1
+      }
+    }
+  ],
+  "expected_decisions": [
+    {
+      "i": 0,
+      "action": "api",
+      "tool": "http",
+      "category": "singul",
+      "confidence": 0.93,
+      "runs": "1",
+      "fields": [
+        {
+          "key": "url",
+          "value": "https://geocoding-api.open-meteo.com/v1/search?name=Kakinada&count=1&language=en&format=json"
+        },
+        {
+          "key": "method",
+          "value": "GET"
+        },
+        {
+          "key": "headers",
+          "value": "{\"Accept\":\"application/json\"}"
+        },
+        {
+          "key": "body",
+          "value": ""
+        }
+      ],
+      "reason": "Geocode Kakinada to get latitude/longitude for weather lookup."
+    },
+    {
+      "i": 1,
+      "action": "api",
+      "tool": "http",
+      "category": "singul",
+      "confidence": 0.9,
+      "runs": "1",
+      "fields": [
+        {
+          "key": "url",
+          "value": "https://api.open-meteo.com/v1/forecast?latitude=16.96036&longitude=82.23809&timezone=Asia%2FKolkata&current=temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,weather_code,wind_speed_10m,wind_direction_10m&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code&forecast_days=3"
+        },
+        {
+          "key": "method",
+          "value": "GET"
+        },
+        {
+          "key": "headers",
+          "value": "{\"Accept\":\"application/json\"}"
+        },
+        {
+          "key": "body",
+          "value": ""
+        }
+      ],
+      "reason": "Fetch current weather for Kakinada using Open-Meteo (no API key required)."
+    },
+    {
+      "i": 2,
+      "action": "answer",
+      "tool": "",
+      "category": "standalone",
+      "confidence": 0.6,
+      "runs": "1",
+      "fields": [
+        {
+          "key": "output",
+          "value": "I’m fetching the current weather in Kakinada now."
+        }
+      ],
+      "reason": "Provide the weather result to the user after API responses are available."
+    },
+    {
+      "i": 2,
+      "action": "finish",
+      "tool": "",
+      "category": "finish",
+      "confidence": 0.92,
+      "runs": "1",
+      "fields": [
+        {
+          "key": "body",
+          "value": "### Current weather in Kakinada (Andhra Pradesh, India)\n- **Time (local):** 2025-12-28 17:00 (Asia/Kolkata)\n- **Temperature:** 24.8°C\n- **Feels like:** 25.6°C\n- **Humidity:** 57%\n- **Wind speed:** 7.6 km/h\n- **Precipitation:** 0 mm\n- **Conditions:** Mostly clear (WMO code 1)"
+        }
+      ],
+      "reason": "Delivered the current weather for Kakinada from the fetched Open-Meteo data."
+    }
+  ]
+}
\ No newline at end of file
diff --git a/agent_test_mock.go b/agent_test_mock.go
new file mode 100644
index 00000000..c195d89e
--- /dev/null
+++ b/agent_test_mock.go
@@ -0,0 +1,485 @@
+package shuffle
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// MockToolCall represents a single tool call with its request and response
+type MockToolCall struct {
+	URL      string                 `json:"url"`
+	Method   string                 `json:"method"`
+	Fields   map[string]string      `json:"fields"`
+	Response map[string]interface{} `json:"response"`
+}
+
+// MockUseCaseData represents the test data for a single use case
+type MockUseCaseData struct {
+	UseCase           string                   `json:"use_case"`
+	UserPrompt        string                   `json:"user_prompt"`
+	ToolCalls         []MockToolCall           `json:"tool_calls"`
+	ExpectedDecisions []map[string]interface{} `json:"expected_decisions"`
+}
+
+// RunAgentDecisionMockHandler handles agent decision execution in test mode
+// This function is called instead of the real Singul endpoint when AGENT_TEST_MODE=true
+//
+// # It loads mock data based on use case and matches tool calls by URL and fields
+//
+// Parameters:
+//   - execution: The full workflow execution context
+//   - decision: The agent decision to execute containing Tool, Action, Fields, etc.
+//
+// Returns:
+//   - rawResponse: The mock tool result as bytes (in Singul format)
+//   - debugUrl: Debug URL (empty in test mode)
+//   - appname: The app name (same as decision.Tool)
+//   - error: Any error that occurred
+func RunAgentDecisionMockHandler(execution WorkflowExecution, decision AgentDecision) ([]byte, string, string, error) {
+	log.Printf("[DEBUG][%s] Mock handler called for tool=%s, action=%s", execution.ExecutionId, decision.Tool, decision.Action)
+
+	useCase := os.Getenv("AGENT_TEST_USE_CASE")
+	if useCase == "" {
+		log.Printf("[ERROR][%s] AGENT_TEST_USE_CASE not set - cannot determine which test data to load", execution.ExecutionId)
+		return nil, "", decision.Tool, errors.New("AGENT_TEST_USE_CASE environment variable not set")
+	}
+
+	response, err := GetMockSingulResponse(useCase, decision.Fields)
+	if err != nil {
+		log.Printf("[ERROR][%s] Failed to get mock response: %s", execution.ExecutionId, err)
+		return nil, "", decision.Tool, err
+	}
+
+	// Parse the response to extract raw_response (same as real Singul handler does)
+	var outputMapped SchemalessOutput
+	err = json.Unmarshal(response, &outputMapped)
+	if err != nil {
+		log.Printf("[ERROR][%s] Failed to unmarshal mock response: %s", execution.ExecutionId, err)
+		return response, "", decision.Tool, err
+	}
+
+	// Extract the raw_response field
+	body := response
+	if val, ok := outputMapped.RawResponse.(string); ok {
+		body = []byte(val)
+	} else if val, ok := outputMapped.RawResponse.([]byte); ok {
+		body = val
+	} else if val, ok := outputMapped.RawResponse.(map[string]interface{}); ok {
+		marshalledRawResp, err := json.MarshalIndent(val, "", "  ")
+		if err != nil {
+			log.Printf("[ERROR][%s] Failed to marshal raw response: %s", execution.ExecutionId, err)
+		} else {
+			body = marshalledRawResp
+		}
+	}
+
+	log.Printf("[DEBUG][%s] Returning mock response for %s (success=%v, response_size=%d bytes)",
+		execution.ExecutionId, decision.Tool, outputMapped.Success, len(body))
+
+	// Return in same format as real Singul handler: (body, debugUrl, appname, error)
+	return body, "", decision.Tool, nil
+}
+
+// GetMockSingulResponse is the function that returns mock Singul responses
+// It loads the use case data and matches based on URL and fields
+//
+// Parameters:
+//   - useCase: The use case name
+//   - fields: The request fields containing url, method, headers, body
+//
+// Returns:
+//   - response: The mock Singul response as bytes (in Singul format)
+//   - error: Any error that occurred
+func GetMockSingulResponse(useCase string, fields []Valuereplace) ([]byte, error) {
+	useCaseData, err := loadUseCaseData(useCase)
+	if err != nil {
+		return nil, err
+	}
+
+	requestURL := extractFieldValue(fields, "url")
+	if requestURL == "" {
+		return nil, errors.New("no URL found in request fields")
+	}
+
+	log.Printf("[DEBUG] Looking for mock data with URL: %s", requestURL)
+
+	var candidates []MockToolCall
+	reqURLParsed, err := url.Parse(requestURL)
+	if err != nil {
+		log.Printf("[ERROR] Invalid request URL %s: %v", requestURL, err)
+		return nil, fmt.Errorf("invalid request URL: %w", err)
+	}
+	for _, tc := range useCaseData.ToolCalls {
+		if urlsEqual(reqURLParsed, tc.URL) {
+			candidates = append(candidates, tc)
+		}
+	}
+
+	// If no exact matches, try fuzzy matching
+	if len(candidates) == 0 {
+		log.Printf("[DEBUG] No exact match, trying fuzzy matching...")
+		bestMatch, score := findBestFuzzyMatch(reqURLParsed, useCaseData.ToolCalls)
+		if score >= 0.80 {
+			log.Printf("[INFO] Found fuzzy match with %.1f%% similarity: %s", score*100, bestMatch.URL)
+			candidates = append(candidates, bestMatch)
+		} else {
+			return nil, fmt.Errorf("no mock data found for URL: %s in use case: %s (best match: %.1f%%)", requestURL, useCase, score*100)
+		}
+	}
+
+	// If only one match, return it
+	if len(candidates) == 1 {
+		log.Printf("[DEBUG] Found exact match for URL: %s", requestURL)
+		return marshalResponse(candidates[0].Response)
+	}
+
+	// Multiple matches - compare fields to find exact match
+	log.Printf("[DEBUG] Found %d candidates for URL, comparing fields...", len(candidates))
+	for _, candidate := range candidates {
+		if fieldsMatch(fields, candidate.Fields) {
+			log.Printf("[DEBUG] Found exact match based on fields")
+			return marshalResponse(candidate.Response)
+		}
+	}
+
+	// No exact match - return first candidate with a warning
+	log.Printf("[WARNING] No exact field match found, returning first candidate")
+	return marshalResponse(candidates[0].Response)
+}
+
+// urlsEqual compares two URLs ignoring query‑parameter order and allowing fuzzy matching when the sets are equal.
+func urlsEqual(req *url.URL, stored string) bool {
+	storedURL, err := url.Parse(stored)
+	if err != nil {
+		log.Printf("[WARN] Invalid stored URL %s: %v", stored, err)
+		return false
+	}
+	if req.Scheme != storedURL.Scheme || req.Host != storedURL.Host || req.Path != storedURL.Path {
+		return false
+	}
+	reqQuery := req.Query()
+	storedQuery := storedURL.Query()
+	// If the number of parameters differs, not a match
+	if len(reqQuery) != len(storedQuery) {
+		return false
+	}
+
+	for key, reqVals := range reqQuery {
+		storedVals, ok := storedQuery[key]
+		if !ok {
+			return false
+		}
+		if len(reqVals) != len(storedVals) {
+			return false
+		}
+		for i, v := range reqVals {
+			if v != storedVals[i] {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+// loadUseCaseData loads the test data for a given use case from JSON file
+func loadUseCaseData(useCase string) (*MockUseCaseData, error) {
+	possiblePaths := []string{}
+
+	if envPath := os.Getenv("AGENT_TEST_DATA_PATH"); envPath != "" {
+		possiblePaths = append(possiblePaths, envPath)
+	}
+
+	possiblePaths = append(possiblePaths, "agent_test_data")
+	possiblePaths = append(possiblePaths, "../shuffle-shared/agent_test_data")
+	possiblePaths = append(possiblePaths, "../../shuffle-shared/agent_test_data")
+
+	if homeDir, err := os.UserHomeDir(); err == nil {
+		possiblePaths = append(possiblePaths, filepath.Join(homeDir, "Documents", "shuffle-shared", "agent_test_data"))
+	}
+
+	var filePath string
+	var foundPath string
+
+	for _, basePath := range possiblePaths {
+		testPath := filepath.Join(basePath, fmt.Sprintf("%s.json", useCase))
+		if _, err := os.Stat(testPath); err == nil {
+			filePath = testPath
+			foundPath = basePath
+			break
+		}
+	}
+
+	if filePath == "" {
+		return nil, fmt.Errorf("could not find test data file %s.json in any of these paths: %v", useCase, possiblePaths)
+	}
+
+	log.Printf("[DEBUG] Loading use case data from: %s", filePath)
+
+	data, err := ioutil.ReadFile(filePath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read use case file %s: %s", filePath, err)
+	}
+
+	var useCaseData MockUseCaseData
+	err = json.Unmarshal(data, &useCaseData)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse use case data: %s", err)
+	}
+
+	log.Printf("[DEBUG] Loaded use case '%s' with %d tool calls from %s", useCaseData.UseCase, len(useCaseData.ToolCalls), foundPath)
+
+	return &useCaseData, nil
+}
+
+// extractFieldValue extracts a field value by key from the fields array
+func extractFieldValue(fields []Valuereplace, key string) string {
+	for _, field := range fields {
+		if field.Key == key {
+			return field.Value
+		}
+	}
+	return ""
+}
+
+func fieldsMatch(requestFields []Valuereplace, storedFields map[string]string) bool {
+	// Convert request fields to map for easier comparison
+	requestMap := make(map[string]string)
+	for _, field := range requestFields {
+		requestMap[field.Key] = field.Value
+	}
+
+	for key, storedValue := range storedFields {
+		requestValue, exists := requestMap[key]
+		if !exists || requestValue != storedValue {
+			return false
+		}
+	}
+
+	return true
+}
+
+func marshalResponse(response map[string]interface{}) ([]byte, error) {
+	data, err := json.Marshal(response)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal response: %s", err)
+	}
+	return data, nil
+}
+
+// analyzeTestFailureWithLLM uses LLM to provide detailed analysis of why a test failed
+func analyzeTestFailureWithLLM(actualDecisions []interface{}, expectedDecisions []map[string]interface{}, isTimeout bool) string {
+	cleanActual := stripRawResponses(actualDecisions)
+	cleanExpected := stripRawResponsesFromMaps(expectedDecisions)
+
+	actualJSON, err := json.MarshalIndent(cleanActual, "", "  ")
+	if err != nil {
+		return "Failed to analyze: could not marshal actual decisions"
+	}
+
+	expectedJSON, err := json.MarshalIndent(cleanExpected, "", "  ")
+	if err != nil {
+		return "Failed to analyze: could not marshal expected decisions"
+	}
+
+	systemMessage := `You are analyzing agent test failures.
+Focus on what the agent ACTUALLY did and where it got stuck.
+
+Output rules:
+- Start with what the agent successfully completed
+- Identify the SPECIFIC action and tool where it failed or got stuck
+- Compare only that failure point with what was expected
+- Ignore answer and finish actions - focus only on API calls and tool usage
+- Be concise (max 2-3 sentences)
+- Use plain language without special characters like quotes, backticks, or brackets
+- Name the specific API or tool that failed
+
+Example output format:
+Agent completed geocoding API call successfully. Got stuck on weather API call - agent used URL with different parameters than expected (missing daily forecast params and using timezone=auto instead of Asia/Kolkata).`
+
+	var userMessage string
+	if isTimeout {
+		userMessage = fmt.Sprintf(`The agent test timed out.
+
+What the agent ACTUALLY did:
+%s
+
+What was EXPECTED (full test plan):
+%s
+
+Analyze from the agent's perspective:
+1. Which API calls or tools did the agent successfully complete?
+2. Where exactly did it get stuck or fail?
+3. What was different about that specific action compared to what was expected?
+4. Ignore any answer or finish actions - focus only on the actual work (API calls, tools).`, string(actualJSON), string(expectedJSON))
+	} else {
+		userMessage = fmt.Sprintf(`The agent test failed.
+
+What the agent ACTUALLY did:
+%s
+
+What was EXPECTED:
+%s
+
+Analyze from the agent's perspective:
+1. Which actions did the agent complete successfully?
+2. Which specific action/tool failed and why?
+3. What was the difference between what the agent did vs what was expected?
+4. Ignore any answer or finish actions.`, string(actualJSON), string(expectedJSON))
+	}
+
+	responseBody, err := RunAiQuery(systemMessage, userMessage)
+	if err != nil {
+		log.Printf("[ERROR] Failed to get LLM analysis: %s", err)
+		return "Failed to analyze with LLM"
+	}
+
+	failureReason := strings.TrimSpace(responseBody)
+	if after, ok := strings.CutPrefix(failureReason, "```"); ok {
+		failureReason = after
+	}
+	if after, ok := strings.CutSuffix(failureReason, "```"); ok {
+		failureReason = after
+	}
+	failureReason = strings.TrimSpace(failureReason)
+
+	log.Printf("[INFO] LLM Analysis: %s", failureReason)
+	return failureReason
+}
+
+// Hmmm, let's see if this helps with token usage, stripRawResponses removes raw_response fields from decisions to save LLM tokens
+func stripRawResponses(decisions []interface{}) []interface{} {
+	cleaned := make([]interface{}, len(decisions))
+	for i, d := range decisions {
+		if decisionMap, ok := d.(map[string]interface{}); ok {
+			cleanedDecision := make(map[string]interface{})
+			for k, v := range decisionMap {
+				// Skip raw_response and other verbose fields
+				if k != "raw_response" && k != "RawResponse" && k != "debug_url" && k != "DebugUrl" {
+					cleanedDecision[k] = v
+				}
+			}
+			cleaned[i] = cleanedDecision
+		} else {
+			cleaned[i] = d
+		}
+	}
+	return cleaned
+}
+
+// stripRawResponsesFromMaps removes raw_response fields from expected decisions
+func stripRawResponsesFromMaps(decisions []map[string]interface{}) []map[string]interface{} {
+	cleaned := make([]map[string]interface{}, len(decisions))
+	for i, decisionMap := range decisions {
+		cleanedDecision := make(map[string]interface{})
+		for k, v := range decisionMap {
+			if k != "raw_response" && k != "RawResponse" && k != "debug_url" && k != "DebugUrl" {
+				cleanedDecision[k] = v
+			}
+		}
+		cleaned[i] = cleanedDecision
+	}
+	return cleaned
+}
+
+// findBestFuzzyMatch finds the most similar URL from stored tool calls
+// Returns the best match and its similarity score (0.0 to 1.0)
+func findBestFuzzyMatch(reqURL *url.URL, toolCalls []MockToolCall) (MockToolCall, float64) {
+	var bestMatch MockToolCall
+	bestScore := 0.0
+
+	for _, tc := range toolCalls {
+		storedURL, err := url.Parse(tc.URL)
+		if err != nil {
+			continue
+		}
+
+		score := calculateURLSimilarity(reqURL, storedURL)
+		if score > bestScore {
+			bestScore = score
+			bestMatch = tc
+		}
+	}
+
+	return bestMatch, bestScore
+}
+
+// calculateURLSimilarity returns a score from 0.0 to 1.0 indicating how similar two URLs are
+func calculateURLSimilarity(url1, url2 *url.URL) float64 {
+	score := 0.0
+	totalWeight := 0.0
+
+	// Scheme (10% weight)
+	if url1.Scheme == url2.Scheme {
+		score += 0.10
+	}
+	totalWeight += 0.10
+
+	// Host (20% weight)
+	if url1.Host == url2.Host {
+		score += 0.20
+	}
+	totalWeight += 0.20
+
+	// Path (20% weight)
+	if url1.Path == url2.Path {
+		score += 0.20
+	}
+	totalWeight += 0.20
+
+	// Query parameters (50% weight)
+	query1 := url1.Query()
+	query2 := url2.Query()
+
+	if len(query1) == 0 && len(query2) == 0 {
+		score += 0.50
+	} else if len(query1) > 0 || len(query2) > 0 {
+		matchingParams := 0
+		totalParams := 0
+
+		allKeys := make(map[string]bool)
+		for k := range query1 {
+			allKeys[k] = true
+		}
+		for k := range query2 {
+			allKeys[k] = true
+		}
+		totalParams = len(allKeys)
+
+		// Count how many match
+		for key := range allKeys {
+			val1, ok1 := query1[key]
+			val2, ok2 := query2[key]
+
+			if ok1 && ok2 {
+				// Both have this key - check if values match
+				if len(val1) == len(val2) {
+					allMatch := true
+					for i := range val1 {
+						if val1[i] != val2[i] {
+							allMatch = false
+							break
+						}
+					}
+					if allMatch {
+						matchingParams++
+					}
+				}
+			}
+		}
+
+		if totalParams > 0 {
+			paramScore := float64(matchingParams) / float64(totalParams)
+			score += paramScore * 0.50
+		}
+	}
+	totalWeight += 0.50
+
+	return score / totalWeight
+}
diff --git a/ai.go b/ai.go
index 0ad9500e..5f50fc1b 100644
--- a/ai.go
+++ b/ai.go
@@ -14,7 +14,9 @@ import (
 	"log"
 	"math/rand"
 	"net/http"
+	"net/url"
 	"os"
+	"path/filepath"
 	"reflect"
 	"regexp"
 	"sort"
@@ -11974,3 +11976,530 @@ func buildManualInputList(history []ConversationMessage, newPrompt string) []map
 
 	return items
 }
+
+// RunAgentTests runs automated tests for AI agents using test data from agent_test_data/
+func RunAgentTests(resp http.ResponseWriter, request *http.Request) {
+	ctx := GetContext(request)
+
+	log.Printf("[INFO] Starting automated agent tests")
+
+	// Try multiple possible paths (same as mock handler)
+	possiblePaths := []string{}
+
+	if envPath := os.Getenv("AGENT_TEST_DATA_PATH"); envPath != "" {
+		possiblePaths = append(possiblePaths, envPath)
+	}
+	possiblePaths = append(possiblePaths, "agent_test_data")
+	possiblePaths = append(possiblePaths, "../shuffle-shared/agent_test_data")
+	possiblePaths = append(possiblePaths, "../../shuffle-shared/agent_test_data")
+
+	// Add cross-platform paths using home directory
+	if homeDir, err := os.UserHomeDir(); err == nil {
+		possiblePaths = append(possiblePaths, filepath.Join(homeDir, "Documents", "shuffle-shared", "agent_test_data"))
+	}
+
+	// Find the first valid path
+	testDataPath := ""
+	for _, path := range possiblePaths {
+		if _, err := os.Stat(path); err == nil {
+			testDataPath = path
+			log.Printf("[INFO] Found test data directory: %s", path)
+			break
+		}
+	}
+
+	if testDataPath == "" {
+		log.Printf("[ERROR] Could not find test data directory in any of: %v", possiblePaths)
+		resp.WriteHeader(500)
+		resp.Write([]byte(fmt.Sprintf(`{"success": false, "reason": "Test data directory not found. Tried: %v"}`, possiblePaths)))
+		return
+	}
+
+	// Read all JSON files from test data directory
+	files, err := ioutil.ReadDir(testDataPath)
+	if err != nil {
+		log.Printf("[ERROR] Failed to read test data directory: %s", err)
+		resp.WriteHeader(500)
+		resp.Write([]byte(fmt.Sprintf(`{"success": false, "reason": "Failed to read test data: %s"}`, err)))
+		return
+	}
+
+	totalTests := 0
+	passedTests := 0
+	failedTests := 0
+
+	type TestResult struct {
+		TestCase string `json:"test_case"`
+		Status   string `json:"status"`
+		Error    string `json:"error,omitempty"`
+	}
+	results := []TestResult{}
+
+	// Run tests for each JSON file
+	for _, file := range files {
+		if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
+			continue
+		}
+
+		totalTests++
+		useCaseName := strings.TrimSuffix(file.Name(), ".json")
+
+		log.Printf("[INFO] ========== Test %d: %s ==========", totalTests, useCaseName)
+
+		// Load test case
+		testFilePath := filepath.Join(testDataPath, file.Name())
+		testData, err := ioutil.ReadFile(testFilePath)
+		if err != nil {
+			log.Printf("[ERROR] Failed to read test file %s: %s", file.Name(), err)
+			failedTests++
+			results = append(results, TestResult{
+				TestCase: file.Name(),
+				Status:   "FAIL",
+				Error:    fmt.Sprintf("Failed to read file: %s", err),
+			})
+			continue
+		}
+
+		var testCase MockUseCaseData
+		err = json.Unmarshal(testData, &testCase)
+		if err != nil {
+			log.Printf("[ERROR] Failed to parse test file %s: %s", file.Name(), err)
+			failedTests++
+			results = append(results, TestResult{
+				TestCase: file.Name(),
+				Status:   "FAIL",
+				Error:    fmt.Sprintf("Failed to parse JSON: %s", err),
+			})
+			continue
+		}
+
+		// Set environment for this test case
+		os.Setenv("AGENT_TEST_MODE", "true")
+		os.Setenv("AGENT_TEST_USE_CASE", useCaseName)
+
+		// Run the test
+		passed, testErr := runSingleAgentTest(ctx, request, testCase, useCaseName)
+		if passed {
+			passedTests++
+			log.Printf("[INFO] ✅ Test %d PASSED: %s", totalTests, useCaseName)
+			results = append(results, TestResult{
+				TestCase: file.Name(),
+				Status:   "PASS",
+			})
+		} else {
+			failedTests++
+			log.Printf("[ERROR] ❌ Test %d FAILED: %s", totalTests, useCaseName)
+			results = append(results, TestResult{
+				TestCase: file.Name(),
+				Status:   "FAIL",
+				Error:    testErr,
+			})
+		}
+	}
+
+	log.Printf("[INFO] ========== Test Summary ==========")
+	log.Printf("[INFO] Total: %d, Passed: %d, Failed: %d", totalTests, passedTests, failedTests)
+
+	// Clean up: Disable test mode after tests complete
+	os.Setenv("AGENT_TEST_MODE", "false")
+	os.Setenv("AGENT_TEST_USE_CASE", "")
+	log.Printf("[INFO] Test mode disabled")
+
+	// Build response
+	type TestResponse struct {
+		Success bool         `json:"success"`
+		Total   int          `json:"total"`
+		Passed  int          `json:"passed"`
+		Failed  int          `json:"failed"`
+		Results []TestResult `json:"results"`
+	}
+
+	response := TestResponse{
+		Success: failedTests == 0,
+		Total:   totalTests,
+		Passed:  passedTests,
+		Failed:  failedTests,
+		Results: results,
+	}
+
+	responseBytes, _ := json.Marshal(response)
+	resp.WriteHeader(200)
+	resp.Write(responseBytes)
+}
+
+// runSingleAgentTest runs a single test case
+func runSingleAgentTest(ctx context.Context, request *http.Request, testCase MockUseCaseData, useCaseName string) (bool, string) {
+	// Get user prompt from test case
+	userPrompt := ""
+	if data, ok := testCase.ToolCalls[0].Response["user_prompt"].(string); ok {
+		userPrompt = data
+	}
+
+	// Try to get from top level
+	type TestCaseWithPrompt struct {
+		UserPrompt string `json:"user_prompt"`
+	}
+	var tempData TestCaseWithPrompt
+	testBytes, _ := json.Marshal(testCase)
+	json.Unmarshal(testBytes, &tempData)
+	if tempData.UserPrompt != "" {
+		userPrompt = tempData.UserPrompt
+	}
+
+	if userPrompt == "" {
+		log.Printf("[ERROR] No user_prompt found in test case")
+		return false, "No user_prompt found in test case"
+	}
+
+	// Build request body for agent_starter
+	requestBody := map[string]interface{}{
+		"id":          uuid.NewV4().String(),
+		"name":        "agent",
+		"app_name":    "AI Agent",
+		"app_id":      "shuffle_agent",
+		"app_version": "1.0.0",
+		"environment": "cloud",
+		"parameters": []map[string]string{
+			{"name": "app_name", "value": "openai"},
+			{"name": "input", "value": userPrompt},
+			{"name": "action", "value": "API"},
+		},
+	}
+
+	requestBodyBytes, err := json.Marshal(requestBody)
+	if err != nil {
+		log.Printf("[ERROR] Failed to marshal request body: %s", err)
+		return false, fmt.Sprintf("Failed to marshal request body: %s", err)
+	}
+
+	// Call agent_starter endpoint
+	baseUrl := "http://localhost:5002"
+	if os.Getenv("BASE_URL") != "" {
+		baseUrl = os.Getenv("BASE_URL")
+	}
+
+	startUrl := fmt.Sprintf("%s/api/v1/apps/agent_starter/run", baseUrl)
+	req, err := http.NewRequest("POST", startUrl, bytes.NewBuffer(requestBodyBytes))
+	if err != nil {
+		log.Printf("[ERROR] Failed to create start request: %s", err)
+		return false, fmt.Sprintf("Failed to create start request: %s", err)
+	}
+
+	// Copy headers from original request
+	for key, values := range request.Header {
+		for _, value := range values {
+			req.Header.Add(key, value)
+		}
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		log.Printf("[ERROR] Failed to start agent: %s", err)
+		return false, fmt.Sprintf("Failed to start agent: %s", err)
+	}
+	defer resp.Body.Close()
+
+	startBody, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		log.Printf("[ERROR] Failed to read start response: %s", err)
+		return false, fmt.Sprintf("Failed to read start response: %s", err)
+	}
+
+	var startResponse struct {
+		Success       bool   `json:"success"`
+		ExecutionId   string `json:"execution_id"`
+		Authorization string `json:"authorization"`
+	}
+	err = json.Unmarshal(startBody, &startResponse)
+	if err != nil {
+		log.Printf("[ERROR] Failed to parse start response: %s", err)
+		return false, fmt.Sprintf("Failed to parse start response: %s", err)
+	}
+
+	if !startResponse.Success {
+		log.Printf("[ERROR] Failed to start agent: %s", string(startBody))
+		return false, fmt.Sprintf("Failed to start agent: %s", string(startBody))
+	}
+
+	log.Printf("[INFO] ✅ Started agent (execution_id: %s)", startResponse.ExecutionId)
+
+	// Poll for results
+	maxRetries := 20
+	retryDelay := 5 * time.Second
+
+	var finalResult map[string]interface{}
+	var lastPolledResult map[string]interface{} // Keep track of last result even if not finished
+	agentFinished := false
+
+	for i := 0; i < maxRetries; i++ {
+		time.Sleep(retryDelay)
+
+		// Call streams/results endpoint
+		resultsUrl := fmt.Sprintf("%s/api/v1/streams/results", baseUrl)
+		resultsBody := map[string]string{
+			"execution_id":  startResponse.ExecutionId,
+			"authorization": startResponse.Authorization,
+		}
+		resultsBodyBytes, _ := json.Marshal(resultsBody)
+
+		resultsReq, err := http.NewRequest("POST", resultsUrl, bytes.NewBuffer(resultsBodyBytes))
+		if err != nil {
+			continue
+		}
+
+		// Copy headers
+		for key, values := range request.Header {
+			for _, value := range values {
+				resultsReq.Header.Add(key, value)
+			}
+		}
+		resultsReq.Header.Set("Content-Type", "application/json")
+
+		resultsResp, err := client.Do(resultsReq)
+		if err != nil {
+			continue
+		}
+
+		resultsRespBody, err := ioutil.ReadAll(resultsResp.Body)
+		resultsResp.Body.Close()
+		if err != nil {
+			continue
+		}
+
+		var resultsData map[string]interface{}
+		err = json.Unmarshal(resultsRespBody, &resultsData)
+		if err != nil {
+			continue
+		}
+
+		// Extract result field (which is a JSON string)
+		resultStr, ok := resultsData["result"].(string)
+		if !ok {
+			continue
+		}
+
+		// Parse the result string as JSON
+		var parsedResult map[string]interface{}
+		err = json.Unmarshal([]byte(resultStr), &parsedResult)
+		if err != nil {
+			continue
+		}
+
+		// Store this as last polled result (even if not finished)
+		lastPolledResult = parsedResult
+
+		// Check if finished
+		status, ok := parsedResult["status"].(string)
+		if ok && status == "FINISHED" {
+			finalResult = parsedResult
+			agentFinished = true
+			break
+		}
+	}
+
+	if !agentFinished {
+		log.Printf("[ERROR] Agent did not finish within timeout (%d retries)", maxRetries)
+
+		// Get ALL decisions from last polled result (not just finished ones!)
+		var allDecisions []interface{}
+		if lastPolledResult != nil {
+			if decisions, ok := lastPolledResult["decisions"].([]interface{}); ok {
+				allDecisions = decisions
+			}
+		}
+
+		// Use LLM to analyze timeout failure with COMPLETE picture
+		llmReason := analyzeTestFailureWithLLM(allDecisions, testCase.ExpectedDecisions, true)
+		return false, fmt.Sprintf("Timeout after %d retries. %s", maxRetries, llmReason)
+	}
+
+	log.Printf("[INFO] ✅ Agent finished")
+
+	// Compare decisions
+	actualDecisions, ok := finalResult["decisions"].([]interface{})
+	if !ok {
+		log.Printf("[ERROR] No decisions found in result")
+		return false, "No decisions found in result"
+	}
+
+	// Get expected decisions from test case
+	expectedDecisionsRaw, ok := testCase.ToolCalls[0].Response["expected_decisions"]
+	if !ok {
+		// Try to get from parsed test case
+		type TestCaseWithDecisions struct {
+			ExpectedDecisions []map[string]interface{} `json:"expected_decisions"`
+		}
+		var tempData TestCaseWithDecisions
+		testBytes, _ := json.Marshal(testCase)
+		json.Unmarshal(testBytes, &tempData)
+
+		if len(tempData.ExpectedDecisions) == 0 {
+			log.Printf("[ERROR] No expected_decisions found in test case")
+			return false, "No expected_decisions found in test case"
+		}
+
+		// Compare decisions
+		passed, errMsg := compareDecisions(actualDecisions, tempData.ExpectedDecisions)
+		if !passed {
+			// Use LLM to provide detailed analysis
+			llmReason := analyzeTestFailureWithLLM(actualDecisions, tempData.ExpectedDecisions, false)
+			return false, llmReason
+		}
+		return passed, errMsg
+	}
+
+	expectedDecisions, ok := expectedDecisionsRaw.([]interface{})
+	if !ok {
+		log.Printf("[ERROR] expected_decisions is not an array")
+		return false, "expected_decisions is not an array"
+	}
+
+	// Convert to comparable format
+	expectedMaps := make([]map[string]interface{}, len(expectedDecisions))
+	for i, ed := range expectedDecisions {
+		if edMap, ok := ed.(map[string]interface{}); ok {
+			expectedMaps[i] = edMap
+		}
+	}
+
+	return compareDecisions(actualDecisions, expectedMaps)
+}
+
+// compareDecisions compares actual vs expected decisions
+func compareDecisions(actual []interface{}, expected []map[string]interface{}) (bool, string) {
+	if len(actual) != len(expected) {
+		errMsg := fmt.Sprintf("Decision count mismatch: expected %d, got %d", len(expected), len(actual))
+		log.Printf("[ERROR] %s", errMsg)
+		return false, errMsg
+	}
+
+	for i := 0; i < len(actual); i++ {
+		actualDecision, ok := actual[i].(map[string]interface{})
+		if !ok {
+			errMsg := fmt.Sprintf("Decision %d: invalid format", i)
+			log.Printf("[ERROR] %s", errMsg)
+			return false, errMsg
+		}
+
+		expectedDecision := expected[i]
+
+		// Compare action
+		actualAction, _ := actualDecision["action"].(string)
+		expectedAction, _ := expectedDecision["action"].(string)
+
+		// Skip comparison for "answer" actions - they're just progress updates
+		if actualAction == "answer" || expectedAction == "answer" {
+			log.Printf("[DEBUG] Decision %d: Skipping comparison for 'answer' action (progress update)", i)
+			continue
+		}
+
+		if actualAction != expectedAction {
+			errMsg := fmt.Sprintf("Decision %d: action mismatch (expected: %s, got: %s)", i, expectedAction, actualAction)
+			log.Printf("[ERROR] %s", errMsg)
+			return false, errMsg
+		}
+
+		// Compare tool
+		actualTool, _ := actualDecision["tool"].(string)
+		expectedTool, _ := expectedDecision["tool"].(string)
+		if actualTool != expectedTool {
+			errMsg := fmt.Sprintf("Decision %d: tool mismatch (expected: %s, got: %s)", i, expectedTool, actualTool)
+			log.Printf("[ERROR] %s", errMsg)
+			return false, errMsg
+		}
+
+		// Compare fields
+		actualFields, _ := actualDecision["fields"].([]interface{})
+		expectedFields, _ := expectedDecision["fields"].([]interface{})
+
+		if !compareFields(actualFields, expectedFields, i) {
+			errMsg := fmt.Sprintf("Decision %d: field mismatch", i)
+			return false, errMsg
+		}
+
+		log.Printf("[INFO] ✅ Decision %d: action=%s, tool=%s, fields match", i, actualAction, actualTool)
+	}
+
+	return true, ""
+}
+
+// compareFields compares field arrays
+func compareFields(actual []interface{}, expected []interface{}, decisionIndex int) bool {
+	// Get the action type from the parent decision context
+	// We need to know if this is "answer" or "finish" to skip field comparison
+
+	// Convert to maps for easier comparison
+	actualMap := make(map[string]string)
+	for _, f := range actual {
+		if fieldMap, ok := f.(map[string]interface{}); ok {
+			key, _ := fieldMap["key"].(string)
+			value, _ := fieldMap["value"].(string)
+			actualMap[key] = value
+		}
+	}
+
+	expectedMap := make(map[string]string)
+	for _, f := range expected {
+		if fieldMap, ok := f.(map[string]interface{}); ok {
+			key, _ := fieldMap["key"].(string)
+			value, _ := fieldMap["value"].(string)
+			expectedMap[key] = value
+		}
+	}
+
+	// Compare all expected fields
+	for key, expectedValue := range expectedMap {
+		actualValue, exists := actualMap[key]
+		if !exists {
+			log.Printf("[ERROR] Decision %d: missing field '%s'", decisionIndex, key)
+			return false
+		}
+
+		// Normalize whitespace for comparison
+		expectedValue = strings.TrimSpace(expectedValue)
+		actualValue = strings.TrimSpace(actualValue)
+
+		// Empty values are OK
+		if expectedValue == "" && actualValue == "" {
+			continue
+		}
+
+		// Skip comparison for LLM-generated content fields
+		if key == "output" || key == "body" {
+			// These contain LLM responses which will vary
+			// Just check they're not empty
+			if actualValue != "" {
+				log.Printf("[DEBUG] Decision %d: Skipping exact comparison for field '%s' (LLM-generated content)", decisionIndex, key)
+				continue
+			}
+		}
+
+		// For URL fields, use fuzzy matching (same as mock)
+		if key == "url" {
+			expectedURL, err1 := url.Parse(expectedValue)
+			actualURL, err2 := url.Parse(actualValue)
+
+			if err1 == nil && err2 == nil {
+				// Use the same fuzzy matching logic as the mock
+				score := calculateURLSimilarity(actualURL, expectedURL)
+				if score >= 0.80 {
+					log.Printf("[DEBUG] Decision %d: URL fuzzy match (%.1f%% similarity)", decisionIndex, score*100)
+					continue
+				} else {
+					log.Printf("[ERROR] Decision %d: URL mismatch (%.1f%% similarity). Expected: %s, Got: %s", decisionIndex, score*100, expectedValue, actualValue)
+					return false
+				}
+			}
+		}
+
+		// Exact match for other fields
+		if expectedValue != actualValue {
+			log.Printf("[ERROR] Decision %d: field '%s' mismatch (expected: %s, got: %s)", decisionIndex, key, expectedValue, actualValue)
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/cloudSync.go b/cloudSync.go
index 23e07bd6..40807e0b 100755
--- a/cloudSync.go
+++ b/cloudSync.go
@@ -2109,6 +2109,32 @@ func RunAgentDecisionSingulActionHandler(execution WorkflowExecution, decision A
 	debugUrl := ""
 	log.Printf("[INFO][%s] Running agent decision action '%s' with app '%s'. This is ran with Singul.", execution.ExecutionId, decision.Action, decision.Tool)
 
+	// Check if running in test mode
+	if os.Getenv("AGENT_TEST_MODE") == "true" {
+		log.Printf("[DEBUG][%s] AGENT_TEST_MODE enabled - using mock tool execution", execution.ExecutionId)
+
+		// Call mock function instead of real Singul
+		// Mock function signature:
+		// func RunAgentDecisionMockHandler(execution WorkflowExecution, decision AgentDecision) ([]byte, string, string, error)
+		//
+		// Inputs needed:
+		// - execution: Full execution context (ExecutionId, Authorization, Workflow, etc)
+		// - decision: The decision to execute (Tool, Action, Fields, etc)
+		//
+		// Returns: (rawResponse []byte, debugUrl string, appname string, error)
+		// - rawResponse: The mock tool result (what Singul would return)
+		// - debugUrl: Debug URL (can be empty in tests)
+		// - appname: The app name (decision.Tool)
+		// - error: Any error that occurred
+		//
+		// The mock function should:
+		// 1. Load stored result based on decision.Tool + decision.Action
+		// 2. Return it in the same format as real Singul
+		// 3. The caller (RunAgentDecisionAction) will handle posting to /streams
+
+		return RunAgentDecisionMockHandler(execution, decision)
+	}
+
 	baseUrl := "https://shuffler.io"
 	if os.Getenv("BASE_URL") != "" {
 		baseUrl = os.Getenv("BASE_URL")
@@ -2148,8 +2174,9 @@ func RunAgentDecisionSingulActionHandler(execution WorkflowExecution, decision A
 	}
 
 	parsedAction := CategoryAction{
-		AppName: decision.Tool,
-		Label:   decision.Action,
+		AppName:  decision.Tool,
+		Label:    decision.Action,
+		Query:    decision.Reason,   // Add the reason field for LLM context
 
 		Fields: oldFields,
 
diff --git a/shared.go b/shared.go
index 28845bf5..1dfa0fcd 100755
--- a/shared.go
+++ b/shared.go
@@ -16278,8 +16278,28 @@ func handleAgentDecisionStreamResult(workflowExecution WorkflowExecution, action
 	}
 
 	if foundActionResultIndex < 0 {
-		log.Printf("[ERROR][%s] Action '%s' was NOT found with any result in the execution (yet)", workflowExecution.ExecutionId, actionResult.Action.ID)
-		return &workflowExecution, false, errors.New(fmt.Sprintf("ActionResultIndex: Agent node ID for decision ID %s not found", decisionId))
+		// In test mode, Singul doesn't create sub-executions, so we need to handle this gracefully
+		if os.Getenv("AGENT_TEST_MODE") == "true" {
+			log.Printf("[DEBUG][%s] AGENT_TEST_MODE: Action '%s' not found in results, creating placeholder", workflowExecution.ExecutionId, actionResult.Action.ID)
+
+			// Create a placeholder result for the agent action
+			placeholderResult := ActionResult{
+				Action:      actionResult.Action,
+				ExecutionId: workflowExecution.ExecutionId,
+				Result:      `{"status":"RUNNING","decisions":[]}`,
+				StartedAt:   time.Now().Unix(),
+				CompletedAt: 0,
+				Status:      "EXECUTING",
+			}
+
+			workflowExecution.Results = append(workflowExecution.Results, placeholderResult)
+			foundActionResultIndex = len(workflowExecution.Results) - 1
+
+			log.Printf("[DEBUG][%s] Created placeholder result at index %d", workflowExecution.ExecutionId, foundActionResultIndex)
+		} else {
+			log.Printf("[ERROR][%s] Action '%s' was NOT found with any result in the execution (yet)", workflowExecution.ExecutionId, actionResult.Action.ID)
+			return &workflowExecution, false, errors.New(fmt.Sprintf("ActionResultIndex: Agent node ID for decision ID %s not found", decisionId))
+		}
 	}
 
 	mappedResult := AgentOutput{}
@@ -16291,6 +16311,28 @@ func handleAgentDecisionStreamResult(workflowExecution WorkflowExecution, action
 		return &workflowExecution, false, err
 	}
 
+	// In test mode, if the placeholder has no decisions, we need to add the incoming decision
+	if os.Getenv("AGENT_TEST_MODE") == "true" && len(mappedResult.Decisions) == 0 {
+		log.Printf("[DEBUG][%s] AGENT_TEST_MODE: Placeholder has no decisions, parsing incoming decision", workflowExecution.ExecutionId)
+
+		// Parse the incoming decision from actionResult
+		incomingDecision := AgentDecision{}
+		err = json.Unmarshal([]byte(actionResult.Result), &incomingDecision)
+		if err != nil {
+			log.Printf("[ERROR][%s] Failed unmarshalling incoming decision: %s", workflowExecution.ExecutionId, err)
+		} else {
+			// Add the decision to the mapped result
+			mappedResult.Decisions = append(mappedResult.Decisions, incomingDecision)
+			mappedResult.Status = "RUNNING"
+
+			// Update the workflow execution result with the new decision
+			updatedResult, _ := json.Marshal(mappedResult)
+			workflowExecution.Results[foundActionResultIndex].Result = string(updatedResult)
+
+			log.Printf("[DEBUG][%s] Added decision %s to placeholder (total decisions: %d)", workflowExecution.ExecutionId, incomingDecision.RunDetails.Id, len(mappedResult.Decisions))
+		}
+	}
+
 	// FIXME: Need to check the current value from the workflowexecution here, instead of using the currently sent in decision
 
 	// 1. Get the current result for the action