1+ import datetime
12import json
23import os .path
34import random
67from typing import Any
78
89from flask import Flask , request
10+ from ovos_bus_client .session import SessionManager
911from ovos_persona import Persona
1012
1113
@@ -18,13 +20,16 @@ def get_app(persona_json):
1820
1921 persona = Persona (persona ["name" ], persona )
2022
23+ #######
2124 @app .route ("/status" , methods = ["GET" ])
2225 def status ():
2326 return {"persona" : persona .name ,
2427 "solvers" : list (persona .solvers .loaded_modules .keys ()),
2528 "models" : {s : persona .config .get (s , {}).get ("model" )
2629 for s in persona .solvers .loaded_modules .keys ()}}
2730
31+ ##############
32+ # OpenAI api compat
2833 @app .route ("/chat/completions" , methods = ["POST" ])
2934 def chat_completions ():
3035 data = request .get_json ()
@@ -97,4 +102,173 @@ def streaming():
97102
98103 return app .response_class (streaming (), mimetype = "text/event-stream" )
99104
105+ ############
106+ # Ollama api compat
107+ @app .route ("/api/chat" , methods = ["POST" ])
108+ def chat ():
109+ model = request .json .get ("model" )
110+ messages = request .json .get ("messages" )
111+ tools = request .json .get ("tools" )
112+ stream = request .json .get ("stream" )
113+
114+ # Format timestamp to the desired format
115+ completion_timestamp = (datetime .datetime .now ().strftime ('%Y-%m-%dT%H:%M:%S' )
116+ + f'.{ int (time .time () * 1_000_000 ) % 1_000_000 :06d} Z' )
117+
118+ sess = SessionManager ().get ()
119+
120+ if not stream :
121+ ans = persona .chat (messages , lang = sess .lang , units = sess .system_unit )
122+ data = {
123+ "model" : persona .name ,
124+ "created_at" : completion_timestamp ,
125+ "message" : {
126+ "role" : "assistant" ,
127+ "content" : ans ,
128+ },
129+ "done" : True
130+ # "context": [1, 2, 3],
131+ # "total_duration": 5043500667,
132+ # "load_duration": 5025959,
133+ # "prompt_eval_count": 26,
134+ # "prompt_eval_duration": 325953000,
135+ # "eval_count": 290,
136+ # "eval_duration": 4709213000
137+ }
138+ return data
139+
140+ def streaming ():
141+ for ans in persona .stream (messages , lang = sess .lang , units = sess .system_unit ):
142+ data = {
143+ "model" : persona .name ,
144+ "created_at" : completion_timestamp ,
145+ "message" : {
146+ "role" : "assistant" ,
147+ "content" : ans
148+ },
149+ "done" : False ,
150+ # "context": [1, 2, 3],
151+ # "total_duration": 10706818083,
152+ # "load_duration": 6338219291,
153+ # "prompt_eval_count": 26,
154+ # "prompt_eval_duration": 130079000,
155+ # "eval_count": 259,
156+ # "eval_duration": 4232710000
157+ }
158+ content = json .dumps (data )
159+ yield content + "\n "
160+
161+ end_completion_data = {
162+ "model" : persona .name ,
163+ "created_at" : completion_timestamp ,
164+ "message" : {
165+ "role" : "assistant" ,
166+ "content" : ""
167+ },
168+ "done" : True ,
169+ # "context": [1, 2, 3],
170+ # "total_duration": 10706818083,
171+ # "load_duration": 6338219291,
172+ # "prompt_eval_count": 26,
173+ # "prompt_eval_duration": 130079000,
174+ # "eval_count": 259,
175+ # "eval_duration": 4232710000
176+ }
177+ content = json .dumps (end_completion_data )
178+ yield content + "\n "
179+
180+ return app .response_class (streaming (), mimetype = "application/json" )
181+
182+ @app .route ("/api/generate" , methods = ["POST" ])
183+ def generate ():
184+ model = request .json .get ("model" )
185+ prompt = request .json .get ("prompt" )
186+ suffix = request .json .get ("suffix" )
187+ system = request .json .get ("system" )
188+ template = request .json .get ("template" )
189+ stream = request .json .get ("stream" )
190+
191+ sess = SessionManager ().get ()
192+
193+ messages = [{
194+ "role" : "user" ,
195+ "content" : prompt
196+ }]
197+ if system :
198+ messages .insert (0 , {"role" : "system" , "content" : system })
199+
200+ # Format timestamp to the desired format
201+ completion_timestamp = (datetime .datetime .now ().strftime ('%Y-%m-%dT%H:%M:%S' )
202+ + f'.{ int (time .time () * 1_000_000 ) % 1_000_000 :06d} Z' )
203+
204+ sess = SessionManager ().get ()
205+
206+ if not stream :
207+ ans = persona .chat (messages , lang = sess .lang , units = sess .system_unit )
208+ data = {
209+ "model" : persona .name ,
210+ "created_at" : completion_timestamp ,
211+ "message" : {
212+ "role" : "assistant" ,
213+ "content" : ans ,
214+ },
215+ "done" : True
216+ # "context": [1, 2, 3],
217+ # "total_duration": 5043500667,
218+ # "load_duration": 5025959,
219+ # "prompt_eval_count": 26,
220+ # "prompt_eval_duration": 325953000,
221+ # "eval_count": 290,
222+ # "eval_duration": 4709213000
223+ }
224+ return data
225+
226+ def streaming ():
227+ for ans in persona .stream (messages , lang = sess .lang , units = sess .system_unit ):
228+ data = {
229+ "model" : persona .name ,
230+ "created_at" : completion_timestamp ,
231+ "message" : {
232+ "role" : "assistant" ,
233+ "content" : ans
234+ },
235+ "done" : False ,
236+ # "context": [1, 2, 3],
237+ # "total_duration": 10706818083,
238+ # "load_duration": 6338219291,
239+ # "prompt_eval_count": 26,
240+ # "prompt_eval_duration": 130079000,
241+ # "eval_count": 259,
242+ # "eval_duration": 4232710000
243+ }
244+ content = json .dumps (data )
245+ yield content + "\n "
246+
247+ end_completion_data = {
248+ "model" : persona .name ,
249+ "created_at" : completion_timestamp ,
250+ "message" : {
251+ "role" : "assistant" ,
252+ "content" : ""
253+ },
254+ "done" : True ,
255+ # "context": [1, 2, 3],
256+ # "total_duration": 10706818083,
257+ # "load_duration": 6338219291,
258+ # "prompt_eval_count": 26,
259+ # "prompt_eval_duration": 130079000,
260+ # "eval_count": 259,
261+ # "eval_duration": 4232710000
262+ }
263+ content = json .dumps (end_completion_data )
264+ yield content + "\n "
265+
266+ return app .response_class (streaming (), mimetype = "text/event-stream" )
267+
268+ @app .route ("/api/tags" , methods = ["GET" ])
269+ def tags ():
270+ return {"models" : [
271+ {"name" : persona .name , "model" : str (persona .solvers .sort_order [0 ])}
272+ ]}
273+
100274 return app
0 commit comments