MiroMindAI
diff --git a/‎config/agent_gaia-validation-text-only_mirothinker_single_agent_new_tools.yaml‎
Lines changed: 2 additions & 2 deletions b/‎config/agent_gaia-validation-text-only_mirothinker_single_agent_new_tools.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎config/agent_web_demo.yaml‎
Lines changed: 3 additions & 3 deletions b/‎config/agent_web_demo.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎config/benchmark/browsecomp-en.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config/benchmark/browsecomp-en.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/benchmark/finsearchcomp.yaml‎
Lines changed: 1 addition & 0 deletions b/‎config/benchmark/finsearchcomp.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/benchmark/frames-test.yaml‎
Lines changed: 22 additions & 0 deletions b/‎config/benchmark/frames-test.yaml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎config/benchmark/futurex.yaml‎
Lines changed: 1 addition & 0 deletions b/‎config/benchmark/futurex.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/benchmark/gaia-test.yaml‎
Lines changed: 1 addition & 0 deletions b/‎config/benchmark/gaia-test.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/benchmark/gaia-validation.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config/benchmark/gaia-validation.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/benchmark/hle-text-only.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config/benchmark/hle-text-only.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/benchmark/hle.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config/benchmark/hle.yaml‎
Lines changed: 3 additions & 0 deletions
@@ -11,8 +11,8 @@ main_agent:
     _base_: config/llm/base_mirothinker.yaml
   prompt: config/prompts/prompt_main_agent.yaml
   tools:
-    - config/tool/tool-search-and-scrape-webpage.yaml
-    - config/tool/tool-jina-scrape-llm-summary.yaml
+    - config/tool/tool-serper-search.yaml
+    - config/tool/tool-jina-scrape.yaml
     - config/tool/tool-code.yaml
   input_processor:
     - ${input-message-generator}
 
@@ -21,9 +21,9 @@ main_agent:
   prompt: config/prompts/prompt_main_agent.yaml
 
   tools:
-    - config/tool/tool-python.yaml
-    - config/tool/tool-search-and-scrape-webpage.yaml
-    - config/tool/tool-jina-scrape-llm-summary.yaml
+    - config/tool/tool-code-sandbox.yaml
+    - config/tool/tool-serper-search.yaml
+    - config/tool/tool-jina-scrape.yaml
     - config/tool/tool-reading.yaml
     #- config/tool/tool-code.yaml
     #- config/tool/tool-image-video.yaml
 
@@ -14,7 +14,10 @@ execution:
   max_tasks: null      # null = no limit, or specify a number
   max_concurrent: 5    # Number of parallel tasks
   pass_at_k: 1         # Number of attempts per task
+  max_retry: 5
+  exceed_max_turn_summary: true
 
 # OpenAI API key for evaluation (required for browsecomp since it has ground truth)
 openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
 
@@ -17,3 +17,4 @@ execution:
 
 # OpenAI API key for evaluation (required for finsearchcomp since it has ground truth)
 openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
@@ -0,0 +1,22 @@
+# config/benchmark/frames-test.yaml
+defaults:
+  - default
+  - _self_
+
+name: "frames-test"
+
+data:
+  data_dir: "${data_dir}/frames-test"  # Path to frames-test dataset
+  metadata_file: "standardized_data.jsonl"  # Metadata filename
+  whitelist: []  # Optional: List of specific task_ids to run
+
+execution:
+  max_tasks: null      # null = no limit, or specify a number
+  max_concurrent: 5    # Number of parallel tasks
+  pass_at_k: 1         # Number of attempts per task
+  max_retry: 5
+  exceed_max_turn_summary: true
+
+# OpenAI API key for evaluation (required for frames-test since it has ground truth)
+openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
@@ -17,4 +17,5 @@ execution:
 
 # Set to skip evaluation since we don't have ground truth
 openai_api_key: "skip_evaluation"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
 
@@ -14,3 +14,4 @@ execution:
   pass_at_k: 1
 
 openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
@@ -12,5 +12,8 @@ execution:
   max_tasks: -1  # null means no limit
   max_concurrent: 15
   pass_at_k: 1
+  max_retry: 5
+  exceed_max_turn_summary: true
 
 openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
@@ -14,7 +14,10 @@ execution:
   max_tasks: null      # null = no limit, or specify a number
   max_concurrent: 10    # Number of parallel tasks
   pass_at_k: 1         # Number of attempts per task
+  max_retry: 5
+  exceed_max_turn_summary: true
 
 # OpenAI API key for evaluation (required for hle-text-only since it has ground truth)
 openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
 
@@ -14,7 +14,10 @@ execution:
   max_tasks: null      # null = no limit, or specify a number
   max_concurrent: 10    # Number of parallel tasks
   pass_at_k: 1         # Number of attempts per task
+  max_retry: 5
+  exceed_max_turn_summary: true
 
 # OpenAI API key for evaluation (required for hle since it has ground truth)
 openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
Original file line number	Diff line number	Diff line change
`@@ -17,3 +17,4 @@ execution:`
`17`	`17`
`18`	`18`	`# OpenAI API key for evaluation (required for finsearchcomp since it has ground truth)`
`19`	`19`	`openai_api_key: "${oc.env:OPENAI_API_KEY,???}"`
	`20`	`+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"`
Original file line number	Diff line number	Diff line change
`@@ -17,4 +17,5 @@ execution:`
`17`	`17`
`18`	`18`	`# Set to skip evaluation since we don't have ground truth`
`19`	`19`	`openai_api_key: "skip_evaluation"`
	`20`	`+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"`
`20`	`21`
Original file line number	Diff line number	Diff line change
`@@ -14,3 +14,4 @@ execution:`
`14`	`14`	`pass_at_k: 1`
`15`	`15`
`16`	`16`	`openai_api_key: "${oc.env:OPENAI_API_KEY,???}"`
	`17`	`+openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"`