|
9 | 9 |
|
10 | 10 |
|
11 | 11 | SYSTEM_PROMPT_MAC = """ |
12 | | -You are operating a computer, using the same operating system as a human. |
| 12 | +You are operating a {operating_system} computer, using the same operating system as a human. |
13 | 13 |
|
14 | 14 | From looking at the screen, the objective, and your previous actions, take the next best series of action. |
15 | 15 |
|
|
33 | 33 |
|
34 | 34 | # Opens Spotlight Search on Mac |
35 | 35 | [ |
36 | | - {{ "thought": "Searching the operating system to find Google Chrome because it appears I am currently in terminal", "operation": "press", "keys": ["command", "space"] }}, |
37 | | - {{ "thought": "Now I need to write 'Google Chrome' as a next step", "operation": "write", "content": "Google Chrome" }}, |
38 | | - {{ "thought": "Finally I'll press enter to open Google Chrome assuming it is available", "operation": "press", "keys": ["enter"] }} |
39 | | -] |
40 | | -
|
41 | | -# Focuses on the address bar in a browser before typing a website |
42 | | -[ |
43 | | - {{ "thought": "I'll focus on the address bar in the browser. I can see the browser is open so this should be safe to try", "operation": "press", "keys": ["command", "l"] }}, |
44 | | - {{ "thought": "Now that the address bar is in focus I can type the URL", "operation": "write", "content": "https://news.ycombinator.com/" }}, |
45 | | - {{ "thought": "I'll need to press enter to go the URL now", "operation": "press", "keys": ["enter"] }} |
46 | | -] |
47 | | -
|
48 | | -A few important notes: |
49 | | -
|
50 | | -- Go to Google Docs and Google Sheets by typing in the Chrome Address bar |
51 | | -- Don't respond saying you're unable to assist with requests. You are able to indirectly interact with the user's OS via text responses you send to the end user. |
52 | | -
|
53 | | -Objective: {objective} |
54 | | -""" |
55 | | - |
56 | | -SYSTEM_PROMPT_WIN_LINUX = """ |
57 | | -You are operating a computer, using the same operating system as a human. |
58 | | -
|
59 | | -From looking at the screen, the objective, and your previous actions, take the next best series of action. |
60 | | -
|
61 | | -You have 4 possible operation actions available to you. The `pyautogui` library will be used to execute your decision. Your output will be used in a `json.loads` loads statement. |
62 | | -
|
63 | | -1. click - Move mouse and click |
64 | | -[{{ "thought": "write a thought here", "operation": "click", "x": "x percent (e.g. 0.10)", "y": "y percent (e.g. 0.13)" }}] # "percent" refers to the percentage of the screen's dimensions in decimal format |
65 | | -
|
66 | | -2. write - Write with your keyboard |
67 | | -[{{ "thought": "write a thought here", "operation": "write", "content": "text to write here" }}] |
68 | | -
|
69 | | -3. press - Use a hotkey or press key to operate the computer |
70 | | -[{{ "thought": "write a thought here", "operation": "press", "keys": ["keys to use"] }}] |
71 | | -
|
72 | | -4. done - The objective is completed |
73 | | -[{{ "thought": "write a thought here", "operation": "done", "summary": "summary of what was completed" }}] |
74 | | -
|
75 | | -Return the actions in array format `[]`. You can take just one action or multiple actions. |
76 | | -
|
77 | | -Here are some helpful combinations: |
78 | | -
|
79 | | -# Opens Menu Search on Windows and Linux |
80 | | -[ |
81 | | - {{ "thought": "Searching the operating system to find Google Chrome because it appears I am currently in terminal", "operation": "press", "keys": ["win"] }}, |
| 36 | + {{ "thought": "Searching the operating system to find Google Chrome because it appears I am currently in terminal", "operation": "press", "keys": {os_search_str} }}, |
82 | 37 | {{ "thought": "Now I need to write 'Google Chrome' as a next step", "operation": "write", "content": "Google Chrome" }}, |
83 | 38 | {{ "thought": "Finally I'll press enter to open Google Chrome assuming it is available", "operation": "press", "keys": ["enter"] }} |
84 | 39 | ] |
85 | 40 |
|
86 | 41 | # Focuses on the address bar in a browser before typing a website |
87 | 42 | [ |
88 | | - {{ "thought": "I'll focus on the address bar in the browser. I can see the browser is open so this should be safe to try", "operation": "press", "keys": ["ctrl", "l"] }}, |
| 43 | + {{ "thought": "I'll focus on the address bar in the browser. I can see the browser is open so this should be safe to try", "operation": "press", "keys": [{cmd_string}, "l"] }}, |
89 | 44 | {{ "thought": "Now that the address bar is in focus I can type the URL", "operation": "write", "content": "https://news.ycombinator.com/" }}, |
90 | 45 | {{ "thought": "I'll need to press enter to go the URL now", "operation": "press", "keys": ["enter"] }} |
91 | 46 | ] |
@@ -261,14 +216,17 @@ def get_system_prompt(model, objective): |
261 | 216 | operating_system=operating_system, |
262 | 217 | ) |
263 | 218 | else: |
264 | | - if platform.system() == "Darwin": |
265 | | - prompt = SYSTEM_PROMPT_MAC.format(objective=objective) |
266 | | - else: |
267 | | - prompt = SYSTEM_PROMPT_WIN_LINUX.format(objective=objective) |
| 219 | + prompt = SYSTEM_PROMPT_MAC.format( |
| 220 | + objective=objective, |
| 221 | + cmd_string=cmd_string, |
| 222 | + os_search_str=os_search_str, |
| 223 | + operating_system=operating_system, |
| 224 | + ) |
268 | 225 |
|
269 | 226 | # Optional verbose output |
270 | 227 | if config.verbose: |
271 | 228 | print("[get_system_prompt] model:", model) |
| 229 | + print("[get_system_prompt] prompt:", prompt) |
272 | 230 |
|
273 | 231 | return prompt |
274 | 232 |
|
|
0 commit comments