microsoft · jaeko44 · Feb 6, 2026 · Feb 6, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/config/config_loader.py b/config/config_loader.py
@@ -45,6 +45,7 @@
 
 import logging
 import os
+import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
@@ -225,6 +226,7 @@ def _load_yaml(self, path: Path) -> Optional[Dict[str, Any]]:
         try:
             with open(path, "r", encoding="utf-8") as f:
                 data = yaml.safe_load(f) or {}
+            data = self._expand_env_vars(data)
             self._cache[cache_key] = data
             return data
         except Exception as e:
@@ -251,6 +253,29 @@ def _deep_merge(self, target: Dict[str, Any], source: Dict[str, Any]) -> None:
             else:
                 target[key] = value
 
+    def _expand_env_vars(self, value: Any) -> Any:
+        """
+        Expand ${VAR} and $VAR placeholders in YAML values using environment variables.
+
+        Only string values are expanded; all other types are returned as-is.
+        Unset variables are left untouched.
+        """
+        if isinstance(value, dict):
+            return {k: self._expand_env_vars(v) for k, v in value.items()}
+        if isinstance(value, list):
+            return [self._expand_env_vars(v) for v in value]
+        if isinstance(value, str):
+            # Expand ${VAR} and $VAR while leaving unknown variables intact.
+            def replacer(match: re.Match[str]) -> str:
+                var_name = match.group(1) or match.group(2)
+                if not var_name:
+                    return match.group(0)
+                env_val = os.getenv(var_name)
+                return env_val if env_val is not None else match.group(0)
+
+            return re.sub(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)", replacer, value)
+        return value
+
     def _discover_yaml_files(self, directory: Path) -> List[Path]:
         """
         Discover all YAML files in a directory.
@@ -520,11 +545,12 @@ def _update_api_base(config: Dict[str, Any], agent_key: str) -> None:
             return
 
         api_type = agent_config.get("API_TYPE", "").lower()
+        use_responses = bool(agent_config.get("USE_RESPONSES", False))
 
         if api_type == "aoai":
             # Azure OpenAI - construct deployment URL
             api_base = agent_config.get("API_BASE", "")
-            if api_base and "deployments" not in api_base:
+            if api_base and "deployments" not in api_base and not use_responses:
                 deployment_id = agent_config.get("API_DEPLOYMENT_ID", "")
                 api_version = agent_config.get("API_VERSION", "")
                 if deployment_id:

diff --git a/config/config_schemas.py b/config/config_schemas.py
@@ -89,6 +89,31 @@ def get(self, key: str, default: Any = None) -> Any:
         except KeyError:
             return default
 
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert AgentConfig to dictionary with uppercase keys plus extras.
+        """
+        data = {
+            "VISUAL_MODE": self.visual_mode,
+            "REASONING_MODEL": self.reasoning_model,
+            "API_TYPE": self.api_type,
+            "API_BASE": self.api_base,
+            "API_KEY": self.api_key,
+            "API_VERSION": self.api_version,
+            "API_MODEL": self.api_model,
+            "AAD_TENANT_ID": self.aad_tenant_id,
+            "AAD_API_SCOPE": self.aad_api_scope,
+            "AAD_API_SCOPE_BASE": self.aad_api_scope_base,
+            "API_DEPLOYMENT_ID": self.api_deployment_id,
+            "PROMPT": self.prompt,
+            "EXAMPLE_PROMPT": self.example_prompt,
+        }
+        # Merge extras (do not overwrite fixed fields if already set)
+        for key, value in self._extras.items():
+            if key not in data:
+                data[key] = value
+        return data
+
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "AgentConfig":
         """

diff --git a/config/ufo/agents.yaml.template b/config/ufo/agents.yaml.template
@@ -18,6 +18,7 @@ HOST_AGENT:
   # API_VERSION: "2024-02-15-preview"
   # API_MODEL: "gpt-4o"
   # API_DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"  # The deployment id for the AOAI API
+  # USE_RESPONSES: True  # Use Responses API instead of Chat Completions
 
   ### For Azure AD authentication (azure_ad)
   # API_TYPE: "azure_ad"
@@ -45,6 +46,7 @@ APP_AGENT:
   # API_VERSION: "2024-02-15-preview"
   # API_MODEL: "gpt-4o"
   # API_DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
+  # USE_RESPONSES: True  # Use Responses API instead of Chat Completions
 
   ### For Azure AD authentication (azure_ad)
   # API_TYPE: "azure_ad"
@@ -72,6 +74,7 @@ BACKUP_AGENT:
   # API_VERSION: "2024-02-15-preview"
   # API_MODEL: "gpt-4-vision-preview"
   # API_DEPLOYMENT_ID: "gpt-4-visual-preview"
+  # USE_RESPONSES: True  # Use Responses API instead of Chat Completions
 
   ### For Azure AD authentication (azure_ad)
   # API_TYPE: "azure_ad"
@@ -95,6 +98,7 @@ EVALUATION_AGENT:
   # API_VERSION: "2024-02-15-preview"
   # API_MODEL: "gpt-4o"
   # API_DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
+  # USE_RESPONSES: True  # Use Responses API instead of Chat Completions
 
   ### For Azure AD authentication (azure_ad)
   # API_TYPE: "azure_ad"

diff --git a/config/ufo/system.yaml b/config/ufo/system.yaml
@@ -49,7 +49,7 @@ ANNOTATION_FONT_SIZE: 22
 # Control Actions
 CLICK_API: "click_input"  # The click API
 AFTER_CLICK_WAIT: 0  # The wait time after clicking in seconds
-INPUT_TEXT_API: "type_keys"  # The input text API: type_keys or set_text
+INPUT_TEXT_API: "set_text"  # The input text API: type_keys or set_text
 INPUT_TEXT_ENTER: False  # Whether to press enter after typing the text
 INPUT_TEXT_INTER_KEY_PAUSE: 0.05  # The pause time between each key press
 
@@ -61,7 +61,7 @@ LOG_LEVEL: "DEBUG"  # The log level
 INCLUDE_LAST_SCREENSHOT: True  # Whether to include the last screenshot in the observation
 REQUEST_TIMEOUT: 250  # The call timeout for the GPT-V model
 LOG_XML: False  # Whether to log the xml file at every step
-LOG_TO_MARKDOWN: True  # Whether to save the log to markdown file
+LOG_TO_MARKDOWN: False  # Whether to save the log to markdown file
 SCREENSHOT_TO_MEMORY: True  # Whether to allow the screenshot to memory
 
 # Image Performance