Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional

Expand Down Expand Up @@ -225,6 +226,7 @@ def _load_yaml(self, path: Path) -> Optional[Dict[str, Any]]:
try:
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
data = self._expand_env_vars(data)
self._cache[cache_key] = data
return data
except Exception as e:
Expand All @@ -251,6 +253,29 @@ def _deep_merge(self, target: Dict[str, Any], source: Dict[str, Any]) -> None:
else:
target[key] = value

def _expand_env_vars(self, value: Any) -> Any:
"""
Expand ${VAR} and $VAR placeholders in YAML values using environment variables.

Only string values are expanded; all other types are returned as-is.
Unset variables are left untouched.
"""
if isinstance(value, dict):
return {k: self._expand_env_vars(v) for k, v in value.items()}
if isinstance(value, list):
return [self._expand_env_vars(v) for v in value]
if isinstance(value, str):
# Expand ${VAR} and $VAR while leaving unknown variables intact.
def replacer(match: re.Match[str]) -> str:
var_name = match.group(1) or match.group(2)
if not var_name:
return match.group(0)
env_val = os.getenv(var_name)
return env_val if env_val is not None else match.group(0)

return re.sub(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)", replacer, value)
return value

def _discover_yaml_files(self, directory: Path) -> List[Path]:
"""
Discover all YAML files in a directory.
Expand Down Expand Up @@ -520,11 +545,12 @@ def _update_api_base(config: Dict[str, Any], agent_key: str) -> None:
return

api_type = agent_config.get("API_TYPE", "").lower()
use_responses = bool(agent_config.get("USE_RESPONSES", False))

if api_type == "aoai":
# Azure OpenAI - construct deployment URL
api_base = agent_config.get("API_BASE", "")
if api_base and "deployments" not in api_base:
if api_base and "deployments" not in api_base and not use_responses:
deployment_id = agent_config.get("API_DEPLOYMENT_ID", "")
api_version = agent_config.get("API_VERSION", "")
if deployment_id:
Expand Down
25 changes: 25 additions & 0 deletions config/config_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,31 @@ def get(self, key: str, default: Any = None) -> Any:
except KeyError:
return default

def to_dict(self) -> Dict[str, Any]:
"""
Convert AgentConfig to dictionary with uppercase keys plus extras.
"""
data = {
"VISUAL_MODE": self.visual_mode,
"REASONING_MODEL": self.reasoning_model,
"API_TYPE": self.api_type,
"API_BASE": self.api_base,
"API_KEY": self.api_key,
"API_VERSION": self.api_version,
"API_MODEL": self.api_model,
"AAD_TENANT_ID": self.aad_tenant_id,
"AAD_API_SCOPE": self.aad_api_scope,
"AAD_API_SCOPE_BASE": self.aad_api_scope_base,
"API_DEPLOYMENT_ID": self.api_deployment_id,
"PROMPT": self.prompt,
"EXAMPLE_PROMPT": self.example_prompt,
}
# Merge extras (do not overwrite fixed fields if already set)
for key, value in self._extras.items():
if key not in data:
data[key] = value
return data

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AgentConfig":
"""
Expand Down
4 changes: 4 additions & 0 deletions config/ufo/agents.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ HOST_AGENT:
# API_VERSION: "2024-02-15-preview"
# API_MODEL: "gpt-4o"
# API_DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID" # The deployment id for the AOAI API
# USE_RESPONSES: True # Use Responses API instead of Chat Completions

### For Azure AD authentication (azure_ad)
# API_TYPE: "azure_ad"
Expand Down Expand Up @@ -45,6 +46,7 @@ APP_AGENT:
# API_VERSION: "2024-02-15-preview"
# API_MODEL: "gpt-4o"
# API_DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
# USE_RESPONSES: True # Use Responses API instead of Chat Completions

### For Azure AD authentication (azure_ad)
# API_TYPE: "azure_ad"
Expand Down Expand Up @@ -72,6 +74,7 @@ BACKUP_AGENT:
# API_VERSION: "2024-02-15-preview"
# API_MODEL: "gpt-4-vision-preview"
# API_DEPLOYMENT_ID: "gpt-4-visual-preview"
# USE_RESPONSES: True # Use Responses API instead of Chat Completions

### For Azure AD authentication (azure_ad)
# API_TYPE: "azure_ad"
Expand All @@ -95,6 +98,7 @@ EVALUATION_AGENT:
# API_VERSION: "2024-02-15-preview"
# API_MODEL: "gpt-4o"
# API_DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
# USE_RESPONSES: True # Use Responses API instead of Chat Completions

### For Azure AD authentication (azure_ad)
# API_TYPE: "azure_ad"
Expand Down
4 changes: 2 additions & 2 deletions config/ufo/system.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ ANNOTATION_FONT_SIZE: 22
# Control Actions
CLICK_API: "click_input" # The click API
AFTER_CLICK_WAIT: 0 # The wait time after clicking in seconds
INPUT_TEXT_API: "type_keys" # The input text API: type_keys or set_text
INPUT_TEXT_API: "set_text" # The input text API: type_keys or set_text
INPUT_TEXT_ENTER: False # Whether to press enter after typing the text
INPUT_TEXT_INTER_KEY_PAUSE: 0.05 # The pause time between each key press

Expand All @@ -61,7 +61,7 @@ LOG_LEVEL: "DEBUG" # The log level
INCLUDE_LAST_SCREENSHOT: True # Whether to include the last screenshot in the observation
REQUEST_TIMEOUT: 250 # The call timeout for the GPT-V model
LOG_XML: False # Whether to log the xml file at every step
LOG_TO_MARKDOWN: True # Whether to save the log to markdown file
LOG_TO_MARKDOWN: False # Whether to save the log to markdown file
SCREENSHOT_TO_MEMORY: True # Whether to allow the screenshot to memory

# Image Performance
Expand Down
Loading
Loading