33import json
44import requests
55import os
6+ from typing import Optional
67
78from ..template import OpenaiTemplate
89from ...requests import StreamSession , raise_for_status
@@ -59,42 +60,22 @@ async def create_async_generator(
5960 messages : Messages ,
6061 api_key : str = None ,
6162 base_url : str = None ,
62- proxy : str = None ,
6363 ** kwargs
6464 ) -> AsyncResult :
65- if base_url is None :
66- host = os .getenv ("OLLAMA_HOST" , "localhost" )
67- port = os .getenv ("OLLAMA_PORT" , "11434" )
68- base_url : str = f"http://{ host } :{ port } /v1"
65+ if not cls .models :
66+ cls .get_models (api_key = api_key , base_url = base_url )
6967 if model in cls .local_models :
70- async with StreamSession (headers = {"Authorization" : f"Bearer { api_key } " }, proxy = proxy ) as session :
71- async with session .post (f"{ base_url .replace ('/v1' , '' )} /api/chat" , json = {
72- "model" : model ,
73- "messages" : messages ,
74- }) as response :
75- await raise_for_status (response )
76- last_data = {}
77- async for chunk in response .iter_lines ():
78- data = json .loads (chunk )
79- last_data = data
80- thinking = data .get ("message" , {}).get ("thinking" , "" )
81- if thinking :
82- yield Reasoning (thinking )
83- content = data .get ("message" , {}).get ("content" , "" )
84- if content :
85- yield content
86- yield Usage (
87- prompt_tokens = last_data .get ("prompt_eval_count" , 0 ),
88- completion_tokens = last_data .get ("eval_count" , 0 ),
89- total_tokens = last_data .get ("prompt_eval_count" , 0 ) + last_data .get ("eval_count" , 0 ),
90- )
68+ if base_url is None :
69+ host = os .getenv ("OLLAMA_HOST" , "localhost" )
70+ port = os .getenv ("OLLAMA_PORT" , "11434" )
71+ base_url : str = f"http://{ host } :{ port } /v1"
9172 else :
92- async for chunk in super (). create_async_generator (
93- model ,
94- messages ,
95- api_key = api_key ,
96- base_url = cls . backup_url ,
97- proxy = proxy ,
98- ** kwargs
99- ):
100- yield chunk
73+ base_url = cls . backup_url
74+ async for chunk in super (). create_async_generator (
75+ model ,
76+ messages ,
77+ api_key = api_key ,
78+ base_url = cls . backup_url ,
79+ ** kwargs
80+ ):
81+ yield chunk
0 commit comments