-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathdata_analysis_env.py
More file actions
215 lines (196 loc) · 7.73 KB
/
data_analysis_env.py
File metadata and controls
215 lines (196 loc) · 7.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import hashlib
import logging
import shutil
from typing import Any, cast
import time
from aviary.core import (
EvalAnswerMode,
Frame,
Message,
Messages,
Tool,
)
from lmi.cost_tracker import GLOBAL_COST_TRACKER, enable_cost_tracking
from .notebook_env import NBEnvironment
from .utils import NBLanguage, MultipleChoiceQuestion, nb_to_html
from . import prompts
from . import config as cfg
logger = logging.getLogger(__name__)
CORRECT_MSG = "Correct answer!"
INCORRECT_MSG = "Incorrect answer."
class DataAnalysisEnv(NBEnvironment):
def __init__(
self,
*,
problem_id: str,
problem: str,
answer: str | int | float | None = None, # noqa: PYI041
system_prompt: str | None = None,
correct_reward: float = 1.0,
eval_mode: EvalAnswerMode | None = None,
metadata: dict[str, Any] | None = None, # used for NBEvalExpt
mcqs: list[MultipleChoiceQuestion] | None = None,
exclude_tools: list[str] | None = None,
**kwargs,
):
super().__init__(**kwargs)
self.problem_id = problem_id
self.problem = problem
self.mcqs = mcqs
self.answer = answer
self.eval_mode = eval_mode
self.correct_reward = correct_reward
self.system_prompt = system_prompt
self.metadata = metadata
self.question_rewards: dict[str, int] = {}
self.exclude_tools = exclude_tools
async def reset(self) -> tuple[Messages, list[Tool]]:
# Discard base class's init_obs and make our own with the problem statement
_, tools = await super().reset()
if self.exclude_tools:
tools = [
tool
for tool in tools
if tool._tool_fn.__name__ not in self.exclude_tools
]
messages = [
Message(content=self.problem),
self.get_env_state_msg(),
]
# If the list_workdir tool is excluded, add the content of the working directory to the initial message
if self.exclude_tools is not None and "list_workdir" in self.exclude_tools:
messages.append(
Message(
content=f"Here is the content of your working directory:\n{self.list_workdir()}"
)
)
if self.system_prompt:
messages.append(Message(role="system", content=self.system_prompt))
init_obs = cast(
Messages,
messages,
)
print(messages)
print(tools)
return init_obs, tools
async def submit_answer(self, answer: str) -> str: # type: ignore[override]
"""Submit an answer to the problem.
Note that this tool may only be called once and ends the episode.
Args:
answer: The answer to the problem
"""
# TODO: support various eval modes
self.state.answer = answer
self.state.done = True
logger.info("Submitting answer and closing environment")
await self.close()
logger.info("Answer: %s", answer)
return answer
def export_frame(self) -> Frame:
return Frame(
state={
"last_action": self.state.actions[-1] if self.state.actions else None,
"answer": self.state.answer,
"done": self.state.done,
"total_reward": self.state.total_reward,
"nb_state": self.state.nb,
"nb_state_html": nb_to_html(self.state.nb),
"nb_runtime_errors": self.state.notebook_runtime_errors,
},
info={
"eval_mode": self.eval_mode,
"language": self.state.language,
"problem": self.problem,
"problem_id": self.problem_id,
"cost": GLOBAL_COST_TRACKER.lifetime_cost_usd,
},
)
@classmethod
def from_task(
cls,
task: str,
gcs_artifact_path: str | None = None,
environment_config: dict[str, Any] | None = None,
) -> "DataAnalysisEnv":
"""
Perform data analysis on a user query.
Args:
task: The user query
gcs_artifact_path: The path to the GCS artifact – required for evaluation on crow jobs
environment_config: A JSON string of environment configuration
"""
logger.info("User task: %s", task[:100])
logger.info("GCS artifact path: %s", gcs_artifact_path)
logger.info("environment_config: %s", environment_config)
# Track cost of running the environment
enable_cost_tracking()
if (
not gcs_artifact_path
): # Platform jobs should always be associated with data from a GCS bucket
raise NotImplementedError(
"Running crow jobs without gcs_artifact_path is not supported"
)
if environment_config:
kwargs = {
k: v
for k, v in environment_config.items()
if k in cfg.VALID_FROM_TASK_KWARGS
}
else:
kwargs = {}
environment_config = {}
logger.info("Filtered kwargs: %s", kwargs)
task_hash = hashlib.sha256(task.encode()).hexdigest()
if environment_config.get("eval", False):
logger.info("Eval mode is True")
# Create a temporary directory in GCP mounted storage volume
trajectory_path = cfg.DATA_STORAGE_PATH / f"{task_hash}-{time.time()}"
trajectory_path.mkdir(parents=True, exist_ok=True)
for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir():
if item.is_file():
shutil.copy2(item, trajectory_path)
elif item.is_dir():
shutil.copytree(
item, trajectory_path / item.name, dirs_exist_ok=True
)
else:
logger.info("Eval mode is False")
# Use the GCP folder created when uploading the data via the platform
trajectory_path = cfg.DATA_STORAGE_PATH / gcs_artifact_path
# Augment incoming user query with CoT instructions
task = (
f"Here is the user query to address:\n"
f"<query>\n"
f"{task}\n"
f"</query>\n"
f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n"
f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}"
)
logger.info("Trajectory path: %s", trajectory_path)
nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME
logger.info("NB path: %s", nb_path)
language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
# Overwrite the language in the kwargs with NBLanguage enum
kwargs["language"] = language
logger.info("Language: %s", language.name)
if language == NBLanguage.R:
task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"
if trajectory_path.exists():
files = list(trajectory_path.iterdir())
logger.info("Files in directory: %s", [f.name for f in files])
if not files:
raise ValueError(
f"No files found in trajectory path: {trajectory_path}"
)
else:
raise ValueError(f"Trajectory path does not exist: {trajectory_path}")
return cls(
problem_id=f"data-analysis-task-{task_hash}",
problem=task,
eval_mode=EvalAnswerMode.LLM,
nb_path=nb_path,
work_dir=trajectory_path,
system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,
use_tmp_work_dir=False,
**kwargs,
)