|
4 | 4 | import litellm
|
5 | 5 |
|
6 | 6 | litellm.suppress_debug_info = True
|
| 7 | +import subprocess |
7 | 8 | import time
|
8 | 9 | import uuid
|
9 | 10 |
|
@@ -133,10 +134,14 @@ def run(self, messages):
|
133 | 134 | precursor = "Imagine I have just shown you an image with this description: "
|
134 | 135 | postcursor = ""
|
135 | 136 |
|
| 137 | + image_description = self.vision_renderer(lmc=img_msg) |
| 138 | + |
| 139 | + # It would be nice to format this as a message to the user and display it like: "I see: image_description" |
| 140 | + |
136 | 141 | img_msg["content"] = (
|
137 |
| - # precursor |
138 |
| - # + self.vision_renderer(lmc=img_msg) + |
139 |
| - "\n---\nThe image contains the following text exactly: '''\n" |
| 142 | + precursor |
| 143 | + + image_description |
| 144 | + + "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n" |
140 | 145 | + self.interpreter.computer.vision.ocr(lmc=img_msg)
|
141 | 146 | + "\n'''"
|
142 | 147 | + postcursor
|
@@ -195,9 +200,9 @@ def run(self, messages):
|
195 | 200 | """
|
196 | 201 | **We were unable to determine the context window of this model.** Defaulting to 3000.
|
197 | 202 |
|
198 |
| -If your model can handle more, run `interpreter.llm.context_window = {token limit}`. |
| 203 | +If your model can handle more, run `self.context_window = {token limit}`. |
199 | 204 |
|
200 |
| -Also please set `interpreter.llm.max_tokens = {max tokens per response}`. |
| 205 | +Also please set `self.max_tokens = {max tokens per response}`. |
201 | 206 |
|
202 | 207 | Continuing...
|
203 | 208 | """
|
@@ -259,6 +264,46 @@ def run(self, messages):
|
259 | 264 | else:
|
260 | 265 | yield from run_text_llm(self, params)
|
261 | 266 |
|
| 267 | + def load(self): |
| 268 | + if self.model.startswith("ollama/"): |
| 269 | + # WOAH we should also hit up ollama and set max_tokens and context_window based on the LLM. I think they let u do that |
| 270 | + |
| 271 | + model_name = self.model.replace("ollama/", "") |
| 272 | + try: |
| 273 | + # List out all downloaded ollama models. Will fail if ollama isn't installed |
| 274 | + result = subprocess.run( |
| 275 | + ["ollama", "list"], capture_output=True, text=True, check=True |
| 276 | + ) |
| 277 | + except Exception as e: |
| 278 | + print(str(e)) |
| 279 | + self.interpreter.display_message( |
| 280 | + f"> Ollama not found\n\nPlease download Ollama from [ollama.com](https://ollama.com/) to use `{model_name}`.\n" |
| 281 | + ) |
| 282 | + exit() |
| 283 | + |
| 284 | + lines = result.stdout.split("\n") |
| 285 | + names = [ |
| 286 | + line.split()[0].replace(":latest", "") |
| 287 | + for line in lines[1:] |
| 288 | + if line.strip() |
| 289 | + ] # Extract names, trim out ":latest", skip header |
| 290 | + |
| 291 | + if model_name not in names: |
| 292 | + self.interpreter.display_message(f"\nDownloading {model_name}...\n") |
| 293 | + subprocess.run(["ollama", "pull", model_name], check=True) |
| 294 | + |
| 295 | + # Send a ping, which will actually load the model |
| 296 | + print(f"\nLoading {model_name}...\n") |
| 297 | + |
| 298 | + old_max_tokens = self.max_tokens |
| 299 | + self.max_tokens = 1 |
| 300 | + self.interpreter.computer.ai.chat("ping") |
| 301 | + self.max_tokens = old_max_tokens |
| 302 | + |
| 303 | + # self.interpreter.display_message("\n*Model loaded.*\n") |
| 304 | + |
| 305 | + # Validate LLM should be moved here!! |
| 306 | + |
262 | 307 |
|
263 | 308 | def fixed_litellm_completions(**params):
|
264 | 309 | """
|
@@ -289,7 +334,7 @@ def fixed_litellm_completions(**params):
|
289 | 334 |
|
290 | 335 | if "api key" in str(first_error).lower() and "api_key" not in params:
|
291 | 336 | print(
|
292 |
| - "LiteLLM requires an API key. Please set a dummy API key to prevent this message. (e.g `interpreter --api_key x` or `interpreter.llm.api_key = 'x'`)" |
| 337 | + "LiteLLM requires an API key. Please set a dummy API key to prevent this message. (e.g `interpreter --api_key x` or `self.api_key = 'x'`)" |
293 | 338 | )
|
294 | 339 |
|
295 | 340 | # So, let's try one more time with a dummy API key:
|
|
0 commit comments