From 2a1c67ce6736cba7ea30f99af558ea402ed16075 Mon Sep 17 00:00:00 2001
From: Toolkit User <dheeraj.46329@gmail.com>
Date: Mon, 27 Oct 2025 17:08:41 +0000
Subject: [PATCH 1/2] qwen3_workarena_changes

---
 src/agentlab/agents/dynamic_prompting.py      | 38 ++++++++++
 .../agents/generic_agent/agent_configs.py     | 42 +++++++++++
 src/agentlab/llm/chat_api.py                  | 71 ++++++++++++++++++-
 3 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/src/agentlab/agents/dynamic_prompting.py b/src/agentlab/agents/dynamic_prompting.py
index 92ad25b9..6a585dd8 100644
--- a/src/agentlab/agents/dynamic_prompting.py
+++ b/src/agentlab/agents/dynamic_prompting.py
@@ -412,6 +412,7 @@ def __init__(self, obs, flags: ObsFlags) -> None:
             visible=lambda: flags.use_html,
             prefix="## ",
         )
+        obs["axtree_txt"] = remove_ui_patterns(obs["axtree_txt"]) 
         self.ax_tree = AXTree(
             obs["axtree_txt"],
             visible_elements_only=flags.filter_visible_elements_only,
@@ -874,3 +875,40 @@ def obs_mapping(obs: dict):
         return obs
 
     return obs_mapping
+
+
+
+
+import re
+
+def remove_ui_patterns(text):
+    """
+    Remove lines containing specific UI patterns for ServiceNow accessibility tree text.
+
+    Args:
+        text (str): The input string containing the accessibility tree
+
+    Returns:
+        str: The cleaned string with lines containing UI patterns removed
+    """
+
+    # Words to look for
+    words_to_remove = ["Edit Widget", "Edit Widget Preferences", "Close Widget", "Add content"]
+
+    # Split text into lines
+    lines = text.split('\n')
+
+    # Keep lines that don't contain any of the words
+    filtered_lines = []
+    for line in lines:
+        should_keep = True
+        for word in words_to_remove:
+            if word in line:
+                should_keep = False
+                break
+        if should_keep:
+            filtered_lines.append(line)
+
+    # Join the remaining lines back together
+    return '\n'.join(filtered_lines)
+
diff --git a/src/agentlab/agents/generic_agent/agent_configs.py b/src/agentlab/agents/generic_agent/agent_configs.py
index ce8914a4..c24e91a2 100644
--- a/src/agentlab/agents/generic_agent/agent_configs.py
+++ b/src/agentlab/agents/generic_agent/agent_configs.py
@@ -149,6 +149,48 @@
     add_missparsed_messages=True,
 )
 
+# qwen3 default config - same as llama3-70b but with use_think_history=False
+FLAGS_QWEN3 = GenericPromptFlags(
+    obs=dp.ObsFlags(
+        use_html=False,
+        use_ax_tree=True,
+        use_focused_element=True,
+        use_error_logs=False,
+        use_history=True,
+        use_past_error_logs=False,
+        use_action_history=True,
+        use_think_history=False,  # Qwen3 doesn't include thinking history
+        use_diff=False,
+        html_type="pruned_html",
+        use_screenshot=False,
+        use_som=False,
+        extract_visible_tag=True,
+        extract_clickable_tag=False,
+        extract_coords="False",
+        filter_visible_elements_only=False,
+    ),
+    action=dp.ActionFlags(
+        action_set=HighLevelActionSetArgs(
+            subsets=["bid"],
+            multiaction=False,
+        ),
+        long_description=False,
+        individual_examples=True,
+    ),
+    use_plan=False,
+    use_criticise=False,
+    use_thinking=True,
+    use_memory=False,
+    use_concrete_example=True,
+    use_abstract_example=True,
+    use_hints=True,
+    enable_chat=False,
+    max_prompt_tokens=40_000,
+    be_cautious=True,
+    extra_instructions=None,
+    add_missparsed_messages=True,
+)
+
 AGENT_LLAMA3_70B = GenericAgentArgs(
     chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/meta-llama/llama-3-70b-instruct"],
     flags=FLAGS_LLAMA3_70B,
diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py
index 188747ac..47a49e2b 100644
--- a/src/agentlab/llm/chat_api.py
+++ b/src/agentlab/llm/chat_api.py
@@ -324,7 +324,7 @@ def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float
             tracking.TRACKER.instance(input_tokens, output_tokens, cost)
 
         if n_samples == 1:
-            res = AIMessage(completion.choices[0].message.content)
+            res = AIMessage(_extract_thinking_content_from_response(completion))
             if self.log_probs:
                 res["log_probs"] = completion.choices[0].log_probs
             return res
@@ -593,3 +593,72 @@ def make_model(self):
             temperature=self.temperature,
             max_tokens=self.max_new_tokens,
         )
+
+
+
+
+import logging
+
+def _extract_thinking_content_from_response(response, wrap_tag: str = "think"):
+    """
+    Extracts the content from the message, including reasoning if available.
+    It wraps the reasoning around <think>...</think> for easy identification of reasoning content,
+    when LLM produces 'text' and 'reasoning' in the same message.
+
+    Args:
+        response: The message object or dict containing content and reasoning.
+        wrap_tag: The tag name to wrap reasoning content (default: "think").
+
+    Returns:
+        str: The extracted content with reasoning wrapped in specified tags.
+    """
+    # Normalize to dict
+    message = response.choices[0].message
+    if not isinstance(message, dict):
+        message = message.to_dict()
+
+    # --- Extract reasoning from either `reasoning` or `reasoning_content` (and optional metadata) ---
+    reasoning_text = (
+        message.get("reasoning")
+        or message.get("reasoning_content")
+    )
+
+    # --- Extract surface text/content (keeps your original behavior, but handles list-style `content`) ---
+    msg_text = message.get("text", "")  # works for OpenRouter
+    raw_content = message.get("content", "")
+
+    if isinstance(raw_content, list):
+        # Concatenate text-like parts if provider returns content blocks
+        parts = []
+        for part in raw_content:
+            if isinstance(part, str):
+                parts.append(part)
+            elif isinstance(part, dict):
+                # Common shapes: {"type":"text","text":"..."} or {"type":"text","text":{"value":"..."}}
+                if part.get("type") == "text":
+                    txt = part.get("text")
+                    if isinstance(txt, dict) and isinstance(txt.get("value"), str):
+                        parts.append(txt["value"])
+                    elif isinstance(txt, str):
+                        parts.append(txt)
+                else:
+                    # Fallback: try a few likely keys
+                    for k in ("content", "text", "value"):
+                        v = part.get(k)
+                        if isinstance(v, str):
+                            parts.append(v)
+                            break
+            else:
+                parts.append(str(part))
+        msg_content = "\n".join(p for p in parts if p)
+    else:
+        msg_content = raw_content if isinstance(raw_content, str) else str(raw_content or "")
+
+    # --- Wrap reasoning if present ---
+    if reasoning_text:
+        reasoning_wrapped = f"<{wrap_tag}>{reasoning_text}</{wrap_tag}>\n" if wrap_tag else (reasoning_text + "\n")
+        logging.debug("Extracting content from response.choices[i].message.(reasoning|reasoning_content)")
+    else:
+        reasoning_wrapped = ""
+
+    return f"{reasoning_wrapped}{msg_text}{msg_content}"
\ No newline at end of file

From 0701c52c4d7be06b20ad4287a3f9330a1866733e Mon Sep 17 00:00:00 2001
From: optimass <massimo.p.caccia@gmail.com>
Date: Fri, 31 Oct 2025 16:05:37 +0000
Subject: [PATCH 2/2] hardcoded n_retry to 1

---
 src/agentlab/agents/generic_agent/generic_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index d1f48f76..0fa8b529 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -30,7 +30,7 @@
 class GenericAgentArgs(AgentArgs):
     chat_model_args: BaseModelArgs = None
     flags: GenericPromptFlags = None
-    max_retry: int = 4
+    max_retry: int = 1
 
     def __post_init__(self):
         try:  # some attributes might be temporarily args.CrossProd for hyperparameter generation
@@ -77,7 +77,7 @@ def __init__(
         self,
         chat_model_args: BaseModelArgs,
         flags: GenericPromptFlags,
-        max_retry: int = 4,
+        max_retry: int = 1,
     ):
 
         self.chat_llm = chat_model_args.make_model()