-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslate_recording.py
More file actions
365 lines (301 loc) · 12.1 KB
/
translate_recording.py
File metadata and controls
365 lines (301 loc) · 12.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""
Translate recorded events to Excel tool calls (v2.0 - Consolidated Tools)
Uses new openpyxl_cell_set_style and universal chart tools
"""
import json
import sys
from pathlib import Path
from collections import defaultdict
def translate_cell_value(details):
"""Translate typing a value into a cell"""
return {
"tool": "openpyxl_worksheet_set_item",
"params": {
"key": details["cell"].replace("$", ""),
"value": details["value"]
}
}
def translate_cell_formula(details):
"""Translate entering a formula"""
return {
"tool": "openpyxl_worksheet_set_item",
"params": {
"key": details["cell"].replace("$", ""),
"value": details["formula"]
}
}
def translate_merge_cells(details):
"""Translate merging cells"""
return {
"tool": "openpyxl_worksheet_merge_cells",
"params": {
"range_string": details["range"]
}
}
def translate_column_width(details):
"""Translate column width change"""
return {
"tool": "openpyxl_worksheet_set_column_width",
"params": {
"column": details["column"],
"width": str(details["width"])
}
}
def translate_row_height(details):
"""Translate row height change"""
return {
"tool": "openpyxl_worksheet_set_row_height",
"params": {
"row": str(details["row"]),
"height": str(details["height"])
}
}
def translate_sheet_create(details):
"""Translate creating new sheet"""
return {
"tool": "openpyxl_workbook_create_sheet",
"params": {
"title": details["sheet_name"]
}
}
def translate_sheet_activate(details):
"""Translate switching sheets"""
return {
"tool": "openpyxl_workbook_get_item",
"params": {
"key": details["sheet_name"]
}
}
def translate_workbook_save(details):
"""Translate saving workbook"""
return {
"tool": "openpyxl_workbook_save",
"params": {
"filename": details.get("filename", "workbook.xlsx")
}
}
# Formatting consolidation helpers
def consolidate_cell_formatting(cell_formats):
"""
Consolidate multiple formatting changes for same cell into single openpyxl_cell_set_style call
cell_formats: dict of {cell_addr: [list of format events]}
"""
consolidated = []
for cell_addr, formats in cell_formats.items():
# Collect all formatting properties for this cell
# Normalize cell reference by stripping $ characters (absolute → relative)
style_params = {"cell_reference": cell_addr.replace("$", "")}
for fmt in formats:
action = fmt['action']
details = fmt['details']
if action == 'format_number':
style_params['number_format'] = details['format']
elif action in ['format_font', 'format_bold', 'format_italic']:
if 'font' not in style_params:
style_params['font'] = {}
# Build inline font JSON
# Handle explicit bold/italic actions (VBA captures these without details)
if action == 'format_bold':
style_params['font']['bold'] = True
elif action == 'format_italic':
style_params['font']['italic'] = True
# Handle detailed font properties
if 'bold' in details:
style_params['font']['bold'] = details['bold']
if 'italic' in details:
style_params['font']['italic'] = details['italic']
if 'size' in details:
style_params['font']['size'] = details['size']
if 'color' in details:
style_params['font']['color'] = details['color']
elif action == 'format_fill':
# Inline fill JSON
style_params['fill'] = json.dumps({
"color": details['color']
})
elif action == 'format_border':
# Inline border JSON
style_params['border'] = json.dumps({
"style": "thin",
"sides": list(details['borders'].keys())
})
elif action == 'format_alignment':
# Inline alignment JSON
align = {}
if details.get('horizontal'):
align['horizontal'] = details['horizontal']
if details.get('vertical'):
align['vertical'] = details['vertical']
if details.get('wrap_text'):
align['wrap_text'] = details['wrap_text']
if details.get('indent') is not None:
align['indent'] = details['indent']
if details.get('text_rotation') is not None:
align['text_rotation'] = details['text_rotation']
if align:
style_params['alignment'] = json.dumps(align)
# Convert font dict to JSON string if exists
if 'font' in style_params and isinstance(style_params['font'], dict):
style_params['font'] = json.dumps(style_params['font'])
# Create consolidated tool call
consolidated.append({
"tool": "openpyxl_cell_set_style",
"params": style_params,
"timestamp": formats[0]['timestamp'], # Use first format's timestamp
"original_actions": [f['action'] for f in formats],
"source": "diff_consolidated"
})
return consolidated
# Main translation mapping for non-formatting events
EVENT_TRANSLATORS = {
"cell_value": translate_cell_value,
"cell_formula": translate_cell_formula,
"merge_cells": translate_merge_cells,
"column_width": translate_column_width,
"row_height": translate_row_height,
"sheet_create": translate_sheet_create,
"sheet_activate": translate_sheet_activate,
"workbook_save": translate_workbook_save,
}
# Formatting events that get consolidated
FORMATTING_EVENTS = {
'format_number', 'format_font', 'format_fill',
'format_border', 'format_alignment',
'format_bold', 'format_italic'
}
def translate_recording(json_file):
"""Translate recorded events to tool calls with formatting consolidation"""
with open(json_file, 'r') as f:
events = json.load(f)
print(f"Loaded {len(events)} events from {json_file.name}")
# Separate formatting events from other events
formatting_events = []
other_events = []
skipped_events = ["snapshot_taken"]
for event in events:
action = event.get("action")
if action in skipped_events:
continue
elif action in FORMATTING_EVENTS:
formatting_events.append(event)
else:
other_events.append(event)
print(f" - Cell operations: {len(other_events)}")
print(f" - Formatting events: {len(formatting_events)}")
# Group formatting events by cell and timestamp window
# Events within 2 seconds for same cell get consolidated
cell_format_groups = defaultdict(list)
for event in formatting_events:
cell = event['details'].get('cell')
if cell:
cell_format_groups[cell].append(event)
# Consolidate formatting for each cell
print(f"\nConsolidating formatting for {len(cell_format_groups)} cells...")
consolidated_formatting = consolidate_cell_formatting(cell_format_groups)
print(f" - Reduced to {len(consolidated_formatting)} consolidated style calls")
# Translate other events
tool_sequence = []
unmapped_events = []
for event in other_events:
action = event.get("action")
details = event.get("details", {})
timestamp = event.get("timestamp")
if action in EVENT_TRANSLATORS:
tool_call = EVENT_TRANSLATORS[action](details)
tool_call["timestamp"] = timestamp
tool_call["original_action"] = action
tool_call["source"] = event.get("detected_by", "vba")
tool_sequence.append(tool_call)
else:
unmapped_events.append({
"action": action,
"timestamp": timestamp,
"details": details
})
print(f"⚠️ Unmapped action: {action}")
# Add consolidated formatting
tool_sequence.extend(consolidated_formatting)
# Sort by timestamp
tool_sequence.sort(key=lambda x: x.get('timestamp', ''))
# DEDUPLICATION: Remove duplicate tool calls (same tool + params)
# Happens when both diff and VBA capture the same operation
seen_signatures = set()
deduplicated_sequence = []
duplicates_removed = 0
for tool in tool_sequence:
# Create signature from tool name and params
tool_name = tool['tool']
params = tool.get('params', {})
param_str = json.dumps(params, sort_keys=True)
signature = (tool_name, param_str)
if signature not in seen_signatures:
seen_signatures.add(signature)
deduplicated_sequence.append(tool)
else:
duplicates_removed += 1
tool_sequence = deduplicated_sequence
print(f"\n✅ Translated {len(tool_sequence)} tool calls")
print(f" - Cell operations: {len([t for t in tool_sequence if t['source'] == 'vba'])}")
print(f" - Consolidated styles: {len(consolidated_formatting)}")
if duplicates_removed > 0:
print(f" - Duplicates removed: {duplicates_removed}")
if unmapped_events:
print(f"⚠️ Unmapped: {len(unmapped_events)} events")
return tool_sequence, unmapped_events
def save_tool_sequence(tool_sequence, output_file):
"""Save the translated tool sequence"""
output = {
"source": "hybrid_vba_plus_diff_v2",
"tool_format": "consolidated_109_tools",
"total_steps": len(tool_sequence),
"tool_sequence": tool_sequence
}
with open(output_file, 'w') as f:
json.dump(output, f, indent=2)
print(f"\n✅ Saved tool sequence to: {output_file}")
def print_summary_stats(tool_sequence):
"""Print helpful summary statistics"""
from collections import Counter
tool_counts = Counter(t['tool'] for t in tool_sequence)
print("\n" + "=" * 60)
print("TOOL USAGE SUMMARY")
print("=" * 60)
print("\nMost used tools:")
for tool, count in tool_counts.most_common(10):
print(f" {tool}: {count}")
# Count consolidated formatting
style_calls = [t for t in tool_sequence if t['tool'] == 'openpyxl_cell_set_style']
if style_calls:
print(f"\n📊 Formatting efficiency:")
total_formats = sum(len(t.get('original_actions', [])) for t in style_calls)
print(f" {total_formats} formatting events → {len(style_calls)} style calls")
print(f" Reduction: {100 - (len(style_calls)/max(total_formats, 1)*100):.1f}%")
def main():
if len(sys.argv) < 2:
print("Usage: python translate_recording.py <complete_log.json>")
print("\nExample:")
print(" python translate_recording.py excel_recordings/recording_20251006_184254_complete.json")
sys.exit(1)
input_file = Path(sys.argv[1])
if not input_file.exists():
print(f"❌ File not found: {input_file}")
sys.exit(1)
output_file = input_file.parent / f"{input_file.stem}_tools.json"
print("=" * 60)
print("TRANSLATING TO CONSOLIDATED TOOL CALLS (v2.0)")
print("=" * 60)
print(f"Input: {input_file.name}\n")
# Translate
tool_sequence, unmapped = translate_recording(input_file)
# Save
save_tool_sequence(tool_sequence, output_file)
# Summary
print_summary_stats(tool_sequence)
print("\n" + "=" * 60)
print("TRANSLATION COMPLETE")
print("=" * 60)
print(f"Output: {output_file}")
print(f"Total tool calls: {len(tool_sequence)}")
print("\n✅ Ready for model training!")
if __name__ == "__main__":
main()