Skip to content

Commit 5ae13a5

Browse files
committed
Fixed formatting and linter issues. Moved custom LLM settings under an Advanced Settings window
1 parent 812ac79 commit 5ae13a5

File tree

3 files changed

+170
-198
lines changed

3 files changed

+170
-198
lines changed

app/llm.py

Lines changed: 58 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -15,32 +15,32 @@ class LLM:
1515
"""
1616
LLM Request
1717
{
18-
"original_user_request": ...,
19-
"step_num": ...,
20-
"screenshot": ...
18+
"original_user_request": ...,
19+
"step_num": ...,
20+
"screenshot": ...
2121
}
2222
2323
step_num is the count of times we've interacted with the LLM for this user request.
2424
If it's 0, we know it's a fresh user request.
25-
If it's greater than 0, then we know we are already in the middle of a request.
26-
Therefore, if the number is positive and from the screenshot it looks like request is complete, then return an
27-
empty list in steps and a string in done. Don't keep looping the same request.
25+
If it's greater than 0, then we know we are already in the middle of a request.
26+
Therefore, if the number is positive and from the screenshot it looks like request is complete, then return an
27+
empty list in steps and a string in done. Don't keep looping the same request.
2828
2929
Expected LLM Response
3030
{
31-
"steps": [
32-
{
33-
"function": "...",
34-
"parameters": {
35-
"key1": "value1",
36-
...
37-
},
38-
"human_readable_justification": "..."
39-
},
40-
{...},
41-
...
42-
],
43-
"done": ...
31+
"steps": [
32+
{
33+
"function": "...",
34+
"parameters": {
35+
"key1": "value1",
36+
...
37+
},
38+
"human_readable_justification": "..."
39+
},
40+
{...},
41+
...
42+
],
43+
"done": ...
4444
}
4545
4646
function is the function name to call in the executor.
@@ -63,71 +63,57 @@ class LLM:
6363

6464
def __init__(self):
6565
settings_dict: dict[str, str] = Settings().get_dict()
66-
if "api_key" in settings_dict.keys() and settings_dict["api_key"]:
67-
os.environ["OPENAI_API_KEY"] = settings_dict["api_key"]
68-
base_url = "https://api.openai.com/v1/"
69-
if "base_url" in settings_dict.keys() and settings_dict["base_url"]:
70-
base_url = settings_dict["base_url"]
71-
if not base_url.endswith("/"):
72-
base_url += "/"
73-
path_to_context_file = (
74-
Path(__file__).resolve().parent.joinpath("resources", "context.txt")
75-
)
76-
with open(path_to_context_file, "r") as file:
66+
67+
base_url = settings_dict.get('base_url', 'https://api.openai.com/v1/').rstrip('/') + '/'
68+
api_key = settings_dict.get('api_key')
69+
if api_key:
70+
os.environ["OPENAI_API_KEY"] = api_key
71+
72+
path_to_context_file = Path(__file__).resolve().parent.joinpath('resources', 'context.txt')
73+
with open(path_to_context_file, 'r') as file:
7774
self.context = file.read()
7875

7976
self.context += f' Locally installed apps are {",".join(local_info.locally_installed_apps)}.'
80-
self.context += f" OS is {local_info.operating_system}."
81-
self.context += f" Primary screen size is {Screen().get_size()}.\n"
77+
self.context += f' OS is {local_info.operating_system}.'
78+
self.context += f' Primary screen size is {Screen().get_size()}.\n'
8279

83-
if (
84-
"default_browser" in settings_dict.keys()
85-
and settings_dict["default_browser"]
86-
):
80+
if 'default_browser' in settings_dict.keys() and settings_dict['default_browser']:
8781
self.context += f'\nDefault browser is {settings_dict["default_browser"]}.'
8882

89-
if "custom_llm_instructions" in settings_dict:
90-
self.context += (
91-
f'\nCustom user-added info: {settings_dict["custom_llm_instructions"]}.'
92-
)
83+
if 'custom_llm_instructions' in settings_dict:
84+
self.context += f'\nCustom user-added info: {settings_dict["custom_llm_instructions"]}.'
85+
86+
self.client = OpenAI()
9387

88+
self.model = settings_dict.get('model')
89+
if not self.model:
90+
self.model = 'gpt-4-vision-preview'
9491
self.client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url)
95-
self.model = (
96-
settings_dict["model"]
97-
if "model" in settings_dict
98-
else "gpt-4-vision-preview"
99-
)
10092

101-
def get_instructions_for_objective(
102-
self, original_user_request: str, step_num: int = 0
103-
) -> dict[str, Any]:
104-
message: list[dict[str, Any]] = self.create_message_for_llm(
105-
original_user_request, step_num
106-
)
93+
def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]:
94+
message: list[dict[str, Any]] = self.create_message_for_llm(original_user_request, step_num)
10795
llm_response = self.send_message_to_llm(message)
108-
json_instructions: dict[str, Any] = self.convert_llm_response_to_json(
109-
llm_response
110-
)
96+
json_instructions: dict[str, Any] = self.convert_llm_response_to_json(llm_response)
11197

11298
return json_instructions
11399

114-
def create_message_for_llm(
115-
self, original_user_request, step_num
116-
) -> list[dict[str, Any]]:
100+
def create_message_for_llm(self, original_user_request, step_num) -> list[dict[str, Any]]:
117101
base64_img: str = Screen().get_screenshot_in_base64()
118102

119-
request_data: str = json.dumps(
120-
{"original_user_request": original_user_request, "step_num": step_num}
121-
)
103+
request_data: str = json.dumps({
104+
'original_user_request': original_user_request,
105+
'step_num': step_num
106+
})
122107

123108
# We have to add context every request for now which is expensive because our chosen model doesn't have a
124109
# stateful/Assistant mode yet.
125110
message = [
126-
{"type": "text", "text": self.context + request_data},
127-
{
128-
"type": "image_url",
129-
"image_url": {"url": f"data:image/jpeg;base64,{base64_img}"},
130-
},
111+
{'type': 'text', 'text': self.context + request_data},
112+
{'type': 'image_url',
113+
'image_url': {
114+
'url': f'data:image/jpeg;base64,{base64_img}'
115+
}
116+
}
131117
]
132118

133119
return message
@@ -137,30 +123,26 @@ def send_message_to_llm(self, message) -> ChatCompletion:
137123
model=self.model,
138124
messages=[
139125
{
140-
"role": "user",
141-
"content": message,
126+
'role': 'user',
127+
'content': message,
142128
}
143129
],
144130
max_tokens=800,
145131
)
146132
return response
147133

148-
def convert_llm_response_to_json(
149-
self, llm_response: ChatCompletion
150-
) -> dict[str, Any]:
134+
def convert_llm_response_to_json(self, llm_response: ChatCompletion) -> dict[str, Any]:
151135
llm_response_data: str = llm_response.choices[0].message.content.strip()
152136

153137
# Our current LLM model does not guarantee a JSON response hence we manually parse the JSON part of the response
154138
# Check for updates here - https://platform.openai.com/docs/guides/text-generation/json-mode
155-
start_index = llm_response_data.find("{")
156-
end_index = llm_response_data.rfind("}")
139+
start_index = llm_response_data.find('{')
140+
end_index = llm_response_data.rfind('}')
157141

158142
try:
159-
json_response = json.loads(
160-
llm_response_data[start_index : end_index + 1].strip()
161-
)
143+
json_response = json.loads(llm_response_data[start_index:end_index + 1].strip())
162144
except Exception as e:
163-
print(f"Error while parsing JSON response - {e}")
145+
print(f'Error while parsing JSON response - {e}')
164146
json_response = {}
165147

166148
return json_response

0 commit comments

Comments
 (0)