Skip to content

Commit 1db06cc

Browse files
Improve Benchmarking Performance (#222)
1 parent 7a6838f commit 1db06cc

File tree

20 files changed

+2035
-35
lines changed

20 files changed

+2035
-35
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
maab/datasets/
33
runs/
44
maab/runs/
5-
/maab/
5+
temp/
66

77
# OS specific
88
*.DS_Store

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies = [
2525
"langchain-anthropic>=0.3.15",
2626
"langchain_aws>=0.2.2",
2727
"pydantic>=2.9.2",
28-
"hydra-core>=1.3",
28+
"hydra-core",
2929
"matplotlib>=3.9.2",
3030
"typer>=0.12.5",
3131
"rich>=13.8.1",
@@ -35,6 +35,8 @@ dependencies = [
3535
"joblib>=1.4.2",
3636
"python-calamine",
3737
"tenacity>=8.2.2",
38+
"torchaudio",
39+
"torchvision",
3840
"transformers<=4.49.0",
3941
"pandas>=2.2",
4042
"streamlit>=1.37",

src/autogluon/assistant/agents/reranker_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def _generate_tutorial_prompt(self, selected_tutorials: List[TutorialInfo]) -> s
110110
# Format selected tutorials
111111
formatted_tutorials = []
112112
for tutorial in selected_tutorials:
113-
formatted = self._format_tutorial_content(tutorial, max_tutorial_length)
113+
per_tutorial_max_length = max_tutorial_length // len(selected_tutorials)
114+
formatted = self._format_tutorial_content(tutorial, per_tutorial_max_length)
114115
if formatted:
115116
formatted_tutorials.append(formatted)
116117

src/autogluon/assistant/configs/default.yaml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ max_file_group_size_to_show: 5
77
num_example_files_to_show: 1
88

99
max_chars_per_file: 1024
10-
num_tutorial_retrievals: 20
10+
num_tutorial_retrievals: 30
1111
max_num_tutorials: 5
1212
max_user_input_length: 2048
1313
max_error_message_length: 2048
14-
max_tutorial_length: 8192
14+
max_tutorial_length: 32768
1515
create_venv: false
1616
condense_tutorials: True
1717
use_tutorial_summary: True
@@ -27,19 +27,18 @@ llm: &default_llm
2727
#model: gpt-4o-2024-08-06
2828
#provider: anthropic
2929
# model: claude-3-7-sonnet-20250219
30-
max_tokens: 16384
30+
max_tokens: 32768
3131
proxy_url: null
32-
temperature: 1.
32+
temperature: 0.1
33+
top_p: 0.9
3334
verbose: True
3435
multi_turn: False
3536
template: null
3637
add_coding_format_instruction: false
3738

3839
coder:
3940
<<: *default_llm # Merge llm_config
40-
temperature: 0.
4141
multi_turn: True
42-
top_p: 1
4342

4443
executer:
4544
<<: *default_llm # Merge llm_config
@@ -58,9 +57,13 @@ retriever:
5857

5958
reranker:
6059
<<: *default_llm # Merge llm_config
60+
temperature: 0.
61+
top_p: 1.
6162

6263
description_file_retriever:
6364
<<: *default_llm # Merge llm_config
65+
temperature: 0.
66+
top_p: 1.
6467

6568
task_descriptor:
6669
<<: *default_llm # Merge llm_config
@@ -69,3 +72,5 @@ task_descriptor:
6972

7073
tool_selector:
7174
<<: *default_llm # Merge llm_config
75+
temperature: 0.
76+
top_p: 1.

src/autogluon/assistant/managers/manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ def generate_initial_prompts(self):
154154

155155
self.selected_tool = self.ts_agent()
156156

157+
# TODO: remove the hard code for "create_venv" (add in tool registry if need installation)
158+
if self.selected_tool.lower() in ["machine learning", "huggingface", "fairseq"]:
159+
self.config.create_venv = True
160+
157161
# Get tool-specific template and requirements if they exist
158162
tool_info = registry.get_tool(self.selected_tool)
159163
if not tool_info:

src/autogluon/assistant/prompts/bash_coder_prompt.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,6 @@ def parse(self, response: Dict) -> Tuple[str, Optional[str]]:
7575

7676
def get_env_prompt(self):
7777
create_venv = self.manager.config.create_venv
78-
# TODO: remove the hard code for "install_packages" (add in tool registry if need installation)
79-
install_packages = "machine learning" in self.manager.selected_tool
8078
output_folder = self.manager.output_folder
8179
selected_tool = self.manager.selected_tool
8280

@@ -87,10 +85,6 @@ def get_env_prompt(self):
8785
- Python version: 3.11
8886
- Activate the environment
8987
- Install required packages"""
90-
elif install_packages:
91-
env_prompt = (
92-
"The environment may not be fully configured. Install any packages required in the python code."
93-
)
9488
else:
9589
env_prompt = f"The environment is already configured. Do not install or update any package unless there is an error due to the missing package. \nDo NOT upgrade {selected_tool} which is already installed."
9690

src/autogluon/assistant/prompts/python_coder_prompt.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,20 @@ def build(self) -> str:
8888
)
8989
prompt = f"{prompt}\n\n{format_instruction}"
9090

91+
# TODO: Remove hardcoding. And add this safeguard for other prompts.
92+
if len(prompt) > 100000:
93+
logger.warning(f"Coder's prompt too long: {len(prompt)}. Truncated.")
94+
self.manager.save_and_log_states(
95+
content=prompt,
96+
save_name="python_coder_prompt_before_truncation.txt",
97+
per_iteration=True,
98+
add_uuid=False,
99+
)
100+
prompt = self._truncate_output_end(
101+
output=prompt,
102+
max_length=100000,
103+
)
104+
91105
self.manager.save_and_log_states(
92106
content=prompt, save_name="python_coder_prompt.txt", per_iteration=True, add_uuid=False
93107
)

src/autogluon/assistant/prompts/task_descriptor_prompt.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ def default_template(self) -> str:
1414
return """
1515
Based ONLY on the information explicitly stated in the provided data structure and description files, provide a condensed and precise description of the data science task. Include only details that are directly mentioned in the source materials. Do not add assumptions or infer unstated information.
1616
17+
Be very clear about the problem type (e.g. audio classification/image regression/seq-to-seq generation/etc.), input format, and prediction output format.
18+
1719
### Data Structure:
1820
(IMPORTANT: The metadata of example files in Data Structure may not be representative - do not make assumptions about data statistics based on examples.)
1921
{data_prompt}

src/autogluon/assistant/tools_registry/FlagEmbedding/tool.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
{
22
"name": "FlagEmbedding",
33
"version": "1.3.4",
4-
"description": "Retrieval and Retrieval-augmented LLMs",
4+
"description": "FlagEmbedding specializes in retrieval and reranking tasks.",
55
"features": [
6-
"retrieval",
7-
"reranking"
86
],
97
"requirements": [],
108
"prompt_template": [

src/autogluon/assistant/tools_registry/_common/catalog.json

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,37 @@
33
"autogluon.timeseries": {
44
"path": "autogluon.timeseries",
55
"version": "1.2.0",
6-
"description": "AutoGluon Timeseries is an open-source AutoML framework that automates the training and tuning of forecasting models for time series data, handling tasks from preprocessing to model ensembling with built-in support for both univariate and multivariate forecasting."
6+
"description": "AutoGluon TimeSeries automates forecasting model training for time series data, supporting both univariate and multivariate predictions."
77
},
88
"autogluon.multimodal": {
99
"path": "autogluon.multimodal",
1010
"version": "1.2.0",
11-
"description": "AutoGluon Multimodal is an open-source AutoML framework that simplifies the training of models across multiple data types including text, images, and tabular data, automating tasks from preprocessing to model ensembling with minimal code required."
11+
"description": "AutoGluon Multimodal excels at multimodal classification, regression, document classification, and semantic segmentation tasks. The framework is not optimized for pure tabular data processing and doesn't support generative tasks (like image-to-image or sequence-to-sequence) or audio-related tasks. It also doesn't handle object detection."
1212
},
1313
"autogluon.tabular": {
1414
"path": "autogluon.tabular",
1515
"version": "1.2.0",
16-
"description": "AutoGluon Tabular is an open-source AutoML framework that automates the training and tuning of machine learning models for tabular data, handling tasks from preprocessing to model ensembling with minimal code required."
16+
"description": "AutoGluon Tabular automates ML model training for tabular data, excelling at numerical and categorical data processing. It doesn't support NLP or image-based tasks."
1717
},
1818
"FlagEmbedding": {
1919
"path": "FlagEmbedding",
2020
"version": "1.3.4",
21-
"description": "Retrieval and Retrieval-augmented LLMs"
21+
"description": "FlagEmbedding specializes in retrieval and reranking tasks."
2222
},
23-
"machine learning or deep learning": {
23+
"machine learning": {
2424
"path": "machine learning",
2525
"version": "0.1.0",
2626
"description": "You should select this as a general reference of machine learning or deep learning algorithms in case other tools are not helpful."
27+
},
28+
"wav2vec2": {
29+
"path": "wav2vec2",
30+
"version": "0.0.0",
31+
"description": "This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification."
32+
},
33+
"qwen3": {
34+
"path": "qwen3",
35+
"version": "0.0.0",
36+
"description": "Qwen3-0.6B is a compact 0.6 billion parameter text generation model that uniquely switches between thinking and non-thinking modes for both efficient dialogue and complex reasoning tasks."
2737
}
2838
}
2939
}

0 commit comments

Comments
 (0)