@@ -66,8 +66,9 @@ def __init__(
6666 self ._get_ollama_model_id ()
6767
6868 # Setup the client and ensure that we have the model available.
69+ self ._base_url = base_url
6970 self ._client = ollama .Client (base_url )
70- self . _async_client = ollama . AsyncClient ( base_url )
71+
7172 if not self ._check_ollama_server ():
7273 err = f"could not create OllamaModelBackend: ollama server not running at { base_url } "
7374 FancyLogger .get_logger ().error (err )
@@ -317,10 +318,13 @@ def generate_from_chat_context(
317318 add_tools_from_context_actions (tools , [action ])
318319 FancyLogger .get_logger ().info (f"Tools for call: { tools .keys ()} " )
319320
321+ # Ollama ties its async client to an event loop so we have to create it here.
322+ async_client = ollama .AsyncClient (self ._base_url )
323+
320324 # Generate a chat response from ollama, using the chat messages. Can be either type since stream is passed as a model option.
321325 chat_response : Coroutine [
322326 Any , Any , AsyncIterator [ollama .ChatResponse ] | ollama .ChatResponse
323- ] = self . _async_client .chat (
327+ ] = async_client .chat (
324328 model = self ._get_ollama_model_id (),
325329 messages = conversation ,
326330 tools = list (tools .values ()),
@@ -382,10 +386,11 @@ async def get_response(coroutines):
382386 responses = await asyncio .gather (* coroutines , return_exceptions = True )
383387 return responses
384388
389+ async_client = ollama .AsyncClient (self ._base_url )
385390 # Run async so that we can make use of Ollama's concurrency.
386391 coroutines = []
387392 for prompt in prompts :
388- co = self . _async_client .generate (
393+ co = async_client .generate (
389394 model = self ._get_ollama_model_id (),
390395 prompt = prompt ,
391396 raw = True ,
0 commit comments