AmberSahdev
diff --git a/‎app/llm.py‎
Lines changed: 58 additions & 76 deletions b/‎app/llm.py‎
Lines changed: 58 additions & 76 deletions
@@ -15,32 +15,32 @@ class LLM:
     """
     LLM Request
     {
-        "original_user_request": ...,
-        "step_num": ...,
-        "screenshot": ...
+    	"original_user_request": ...,
+    	"step_num": ...,
+    	"screenshot": ...
     }
 
     step_num is the count of times we've interacted with the LLM for this user request.
         If it's 0, we know it's a fresh user request.
-        If it's greater than 0, then we know we are already in the middle of a request.
-        Therefore, if the number is positive and from the screenshot it looks like request is complete, then return an
-            empty list in steps and a string in done. Don't keep looping the same request.
+    	If it's greater than 0, then we know we are already in the middle of a request.
+    	Therefore, if the number is positive and from the screenshot it looks like request is complete, then return an
+    	    empty list in steps and a string in done. Don't keep looping the same request.
 
     Expected LLM Response
     {
-        "steps": [
-                {
-                        "function": "...",
-                        "parameters": {
-                                "key1": "value1",
-                                ...
-                        },
-                        "human_readable_justification": "..."
-                },
-                {...},
-                ...
-        ],
-        "done": ...
+    	"steps": [
+    		{
+    			"function": "...",
+    			"parameters": {
+    				"key1": "value1",
+    				...
+    			},
+    			"human_readable_justification": "..."
+    		},
+    		{...},
+    		...
+    	],
+    	"done": ...
     }
 
     function is the function name to call in the executor.
@@ -63,71 +63,57 @@ class LLM:
 
     def __init__(self):
         settings_dict: dict[str, str] = Settings().get_dict()
-        if "api_key" in settings_dict.keys() and settings_dict["api_key"]:
-            os.environ["OPENAI_API_KEY"] = settings_dict["api_key"]
-        base_url = "https://api.openai.com/v1/"
-        if "base_url" in settings_dict.keys() and settings_dict["base_url"]:
-            base_url = settings_dict["base_url"]
-        if not base_url.endswith("/"):
-            base_url += "/"
-        path_to_context_file = (
-            Path(__file__).resolve().parent.joinpath("resources", "context.txt")
-        )
-        with open(path_to_context_file, "r") as file:
+
+        base_url = settings_dict.get('base_url', 'https://api.openai.com/v1/').rstrip('/') + '/'
+        api_key = settings_dict.get('api_key')
+        if api_key:
+            os.environ["OPENAI_API_KEY"] = api_key
+
+        path_to_context_file = Path(__file__).resolve().parent.joinpath('resources', 'context.txt')
+        with open(path_to_context_file, 'r') as file:
             self.context = file.read()
 
         self.context += f' Locally installed apps are {",".join(local_info.locally_installed_apps)}.'
-        self.context += f" OS is {local_info.operating_system}."
-        self.context += f" Primary screen size is {Screen().get_size()}.\n"
+        self.context += f' OS is {local_info.operating_system}.'
+        self.context += f' Primary screen size is {Screen().get_size()}.\n'
 
-        if (
-            "default_browser" in settings_dict.keys()
-            and settings_dict["default_browser"]
-        ):
+        if 'default_browser' in settings_dict.keys() and settings_dict['default_browser']:
             self.context += f'\nDefault browser is {settings_dict["default_browser"]}.'
 
-        if "custom_llm_instructions" in settings_dict:
-            self.context += (
-                f'\nCustom user-added info: {settings_dict["custom_llm_instructions"]}.'
-            )
+        if 'custom_llm_instructions' in settings_dict:
+            self.context += f'\nCustom user-added info: {settings_dict["custom_llm_instructions"]}.'
+
+        self.client = OpenAI()
 
+        self.model = settings_dict.get('model')
+        if not self.model:
+            self.model = 'gpt-4-vision-preview'
         self.client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url)
-        self.model = (
-            settings_dict["model"]
-            if "model" in settings_dict
-            else "gpt-4-vision-preview"
-        )
 
-    def get_instructions_for_objective(
-        self, original_user_request: str, step_num: int = 0
-    ) -> dict[str, Any]:
-        message: list[dict[str, Any]] = self.create_message_for_llm(
-            original_user_request, step_num
-        )
+    def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]:
+        message: list[dict[str, Any]] = self.create_message_for_llm(original_user_request, step_num)
         llm_response = self.send_message_to_llm(message)
-        json_instructions: dict[str, Any] = self.convert_llm_response_to_json(
-            llm_response
-        )
+        json_instructions: dict[str, Any] = self.convert_llm_response_to_json(llm_response)
 
         return json_instructions
 
-    def create_message_for_llm(
-        self, original_user_request, step_num
-    ) -> list[dict[str, Any]]:
+    def create_message_for_llm(self, original_user_request, step_num) -> list[dict[str, Any]]:
         base64_img: str = Screen().get_screenshot_in_base64()
 
-        request_data: str = json.dumps(
-            {"original_user_request": original_user_request, "step_num": step_num}
-        )
+        request_data: str = json.dumps({
+            'original_user_request': original_user_request,
+            'step_num': step_num
+        })
 
         # We have to add context every request for now which is expensive because our chosen model doesn't have a
         #   stateful/Assistant mode yet.
         message = [
-            {"type": "text", "text": self.context + request_data},
-            {
-                "type": "image_url",
-                "image_url": {"url": f"data:image/jpeg;base64,{base64_img}"},
-            },
+            {'type': 'text', 'text': self.context + request_data},
+            {'type': 'image_url',
+             'image_url': {
+                 'url': f'data:image/jpeg;base64,{base64_img}'
+             }
+             }
         ]
 
         return message
@@ -137,30 +123,26 @@ def send_message_to_llm(self, message) -> ChatCompletion:
             model=self.model,
             messages=[
                 {
-                    "role": "user",
-                    "content": message,
+                    'role': 'user',
+                    'content': message,
                 }
             ],
             max_tokens=800,
         )
         return response
 
-    def convert_llm_response_to_json(
-        self, llm_response: ChatCompletion
-    ) -> dict[str, Any]:
+    def convert_llm_response_to_json(self, llm_response: ChatCompletion) -> dict[str, Any]:
         llm_response_data: str = llm_response.choices[0].message.content.strip()
 
         # Our current LLM model does not guarantee a JSON response hence we manually parse the JSON part of the response
         # Check for updates here - https://platform.openai.com/docs/guides/text-generation/json-mode
-        start_index = llm_response_data.find("{")
-        end_index = llm_response_data.rfind("}")
+        start_index = llm_response_data.find('{')
+        end_index = llm_response_data.rfind('}')
 
         try:
-            json_response = json.loads(
-                llm_response_data[start_index : end_index + 1].strip()
-            )
+            json_response = json.loads(llm_response_data[start_index:end_index + 1].strip())
         except Exception as e:
-            print(f"Error while parsing JSON response - {e}")
+            print(f'Error while parsing JSON response - {e}')
             json_response = {}
 
         return json_response