|
1 | | -import datetime |
| 1 | +""" |
| 2 | +Main application entry point for the OVOS Persona Server. |
| 3 | +
|
| 4 | +This module initializes the FastAPI application, sets up CORS middleware, |
| 5 | +and includes various API routers for chat, embeddings, Ollama, persona status, |
| 6 | +and mock OpenAI Vector Stores. It now centrally manages the unified SQLite database |
| 7 | +initialization using SQLAlchemy. |
| 8 | +""" |
2 | 9 | import json |
3 | | -import os.path |
4 | | -import random |
5 | | -import string |
6 | | -import time |
7 | | -from typing import Any |
| 10 | +import os |
| 11 | +from typing import Optional |
8 | 12 |
|
9 | | -from flask import Flask, request |
10 | | -from ovos_bus_client.session import SessionManager |
| 13 | +from fastapi import FastAPI |
| 14 | +from fastapi.middleware.cors import CORSMiddleware |
11 | 15 | from ovos_persona import Persona |
12 | 16 |
|
| 17 | +import ovos_persona_server.persona |
13 | 18 |
|
14 | | -def get_app(persona_json): |
15 | | - app = Flask(__name__) |
16 | 19 |
|
17 | | - with open(persona_json) as f: |
18 | | - persona = json.load(f) |
19 | | - persona["name"] = persona.get("name") or os.path.basename(persona_json) |
20 | | - |
21 | | - persona = Persona(persona["name"], persona) |
22 | | - |
23 | | - ####### |
24 | | - @app.route("/status", methods=["GET"]) |
25 | | - def status(): |
26 | | - return {"persona": persona.name, |
27 | | - "solvers": list(persona.solvers.loaded_modules.keys()), |
28 | | - "models": {s: persona.config.get(s, {}).get("model") |
29 | | - for s in persona.solvers.loaded_modules.keys()}} |
30 | | - |
31 | | - ############## |
32 | | - # OpenAI api compat |
33 | | - @app.route("/chat/completions", methods=["POST"]) |
34 | | - def chat_completions(): |
35 | | - data = request.get_json() |
36 | | - stream = data.get("stream", False) |
37 | | - messages = data.get("messages") |
38 | | - |
39 | | - completion_id = "".join(random.choices(string.ascii_letters + string.digits, k=28)) |
40 | | - completion_timestamp = int(time.time()) |
41 | | - |
42 | | - if not stream: |
43 | | - return { |
44 | | - "id": f"chatcmpl-{completion_id}", |
45 | | - "object": "chat.completion", |
46 | | - "created": completion_timestamp, |
47 | | - "model": persona.name, |
48 | | - "choices": [ |
49 | | - { |
50 | | - "index": 0, |
51 | | - "message": { |
52 | | - "role": "assistant", |
53 | | - "content": persona.chat(messages), |
54 | | - }, |
55 | | - "finish_reason": "stop", |
56 | | - } |
57 | | - ], |
58 | | - "usage": { |
59 | | - "prompt_tokens": None, |
60 | | - "completion_tokens": None, |
61 | | - "total_tokens": None, |
62 | | - }, |
63 | | - } |
64 | | - |
65 | | - def streaming(): |
66 | | - for chunk in persona.stream(messages): |
67 | | - completion_data = { |
68 | | - "id": f"chatcmpl-{completion_id}", |
69 | | - "object": "chat.completion.chunk", |
70 | | - "created": completion_timestamp, |
71 | | - "model": persona.name, |
72 | | - "choices": [ |
73 | | - { |
74 | | - "index": 0, |
75 | | - "delta": { |
76 | | - "content": chunk, |
77 | | - }, |
78 | | - "finish_reason": None, |
79 | | - } |
80 | | - ], |
81 | | - } |
82 | | - |
83 | | - content = json.dumps(completion_data, separators=(",", ":")) |
84 | | - yield f"data: {content}\n\n" |
85 | | - time.sleep(0.1) |
86 | | - |
87 | | - end_completion_data: dict[str, Any] = { |
88 | | - "id": f"chatcmpl-{completion_id}", |
89 | | - "object": "chat.completion.chunk", |
90 | | - "created": completion_timestamp, |
91 | | - "model": persona.name, |
92 | | - "choices": [ |
93 | | - { |
94 | | - "index": 0, |
95 | | - "delta": {}, |
96 | | - "finish_reason": "stop", |
97 | | - } |
98 | | - ], |
99 | | - } |
100 | | - content = json.dumps(end_completion_data, separators=(",", ":")) |
101 | | - yield f"data: {content}\n\n" |
| 20 | +def create_persona_app(persona_path: str) -> FastAPI: |
| 21 | + """ |
| 22 | + Creates and configures the FastAPI application for the Persona Server. |
102 | 23 |
|
103 | | - return app.response_class(streaming(), mimetype="text/event-stream") |
| 24 | + Args: |
| 25 | + persona_path (Optional[str]): Optional path to a persona JSON file. |
| 26 | + If provided, it overrides the default |
| 27 | + persona path from settings or environment. |
104 | 28 |
|
105 | | - ############ |
106 | | - # Ollama api compat |
107 | | - @app.route("/api/chat", methods=["POST"]) |
108 | | - def chat(): |
109 | | - model = request.json.get("model") |
110 | | - messages = request.json.get("messages") |
111 | | - tools = request.json.get("tools") |
112 | | - stream = request.json.get("stream") |
| 29 | + Returns: |
| 30 | + FastAPI: The configured FastAPI application instance. |
| 31 | + """ |
113 | 32 |
|
114 | | - # Format timestamp to the desired format |
115 | | - completion_timestamp = (datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S') |
116 | | - + f'.{int(time.time() * 1_000_000) % 1_000_000:06d}Z') |
117 | | - |
118 | | - sess = SessionManager().get() |
119 | | - |
120 | | - if not stream: |
121 | | - ans = persona.chat(messages, lang=sess.lang, units=sess.system_unit) |
122 | | - data = { |
123 | | - "model": persona.name, |
124 | | - "created_at": completion_timestamp, |
125 | | - "message": { |
126 | | - "role": "assistant", |
127 | | - "content": ans, |
128 | | - }, |
129 | | - "done": True |
130 | | - # "context": [1, 2, 3], |
131 | | - # "total_duration": 5043500667, |
132 | | - # "load_duration": 5025959, |
133 | | - # "prompt_eval_count": 26, |
134 | | - # "prompt_eval_duration": 325953000, |
135 | | - # "eval_count": 290, |
136 | | - # "eval_duration": 4709213000 |
137 | | - } |
138 | | - return data |
139 | | - |
140 | | - def streaming(): |
141 | | - for ans in persona.stream(messages, lang=sess.lang, units=sess.system_unit): |
142 | | - data = { |
143 | | - "model": persona.name, |
144 | | - "created_at": completion_timestamp, |
145 | | - "message": { |
146 | | - "role": "assistant", |
147 | | - "content": ans |
148 | | - }, |
149 | | - "done": False, |
150 | | - # "context": [1, 2, 3], |
151 | | - # "total_duration": 10706818083, |
152 | | - # "load_duration": 6338219291, |
153 | | - # "prompt_eval_count": 26, |
154 | | - # "prompt_eval_duration": 130079000, |
155 | | - # "eval_count": 259, |
156 | | - # "eval_duration": 4232710000 |
157 | | - } |
158 | | - content = json.dumps(data) |
159 | | - yield content + "\n" |
160 | | - |
161 | | - end_completion_data = { |
162 | | - "model": persona.name, |
163 | | - "created_at": completion_timestamp, |
164 | | - "message": { |
165 | | - "role": "assistant", |
166 | | - "content": "" |
167 | | - }, |
168 | | - "done": True, |
169 | | - # "context": [1, 2, 3], |
170 | | - # "total_duration": 10706818083, |
171 | | - # "load_duration": 6338219291, |
172 | | - # "prompt_eval_count": 26, |
173 | | - # "prompt_eval_duration": 130079000, |
174 | | - # "eval_count": 259, |
175 | | - # "eval_duration": 4232710000 |
176 | | - } |
177 | | - content = json.dumps(end_completion_data) |
178 | | - yield content + "\n" |
179 | | - |
180 | | - return app.response_class(streaming(), mimetype="application/json") |
181 | | - |
182 | | - @app.route("/api/generate", methods=["POST"]) |
183 | | - def generate(): |
184 | | - model = request.json.get("model") |
185 | | - prompt = request.json.get("prompt") |
186 | | - suffix = request.json.get("suffix") |
187 | | - system = request.json.get("system") |
188 | | - template = request.json.get("template") |
189 | | - stream = request.json.get("stream") |
190 | | - |
191 | | - sess = SessionManager().get() |
| 33 | + with open(persona_path) as f: |
| 34 | + persona = json.load(f) |
| 35 | + persona["name"] = persona.get("name") or os.path.basename(persona_path) |
192 | 36 |
|
193 | | - messages = [{ |
194 | | - "role": "user", |
195 | | - "content": prompt |
196 | | - }] |
197 | | - if system: |
198 | | - messages.insert(0, {"role": "system", "content": system}) |
| 37 | + # TODO - move to dependency injection |
| 38 | + ovos_persona_server.persona.default_persona = persona = Persona(persona["name"], persona) |
199 | 39 |
|
200 | | - # Format timestamp to the desired format |
201 | | - completion_timestamp = (datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S') |
202 | | - + f'.{int(time.time() * 1_000_000) % 1_000_000:06d}Z') |
| 40 | + from ovos_persona_server.version import VERSION_MAJOR, VERSION_ALPHA, VERSION_BUILD, VERSION_MINOR |
203 | 41 |
|
204 | | - sess = SessionManager().get() |
| 42 | + version_str = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}" |
| 43 | + if VERSION_ALPHA: |
| 44 | + version_str += f"a{VERSION_ALPHA}" |
205 | 45 |
|
206 | | - if not stream: |
207 | | - ans = persona.chat(messages, lang=sess.lang, units=sess.system_unit) |
208 | | - data = { |
209 | | - "model": persona.name, |
210 | | - "created_at": completion_timestamp, |
211 | | - "message": { |
212 | | - "role": "assistant", |
213 | | - "content": ans, |
214 | | - }, |
215 | | - "done": True |
216 | | - # "context": [1, 2, 3], |
217 | | - # "total_duration": 5043500667, |
218 | | - # "load_duration": 5025959, |
219 | | - # "prompt_eval_count": 26, |
220 | | - # "prompt_eval_duration": 325953000, |
221 | | - # "eval_count": 290, |
222 | | - # "eval_duration": 4709213000 |
223 | | - } |
224 | | - return data |
| 46 | + app = FastAPI(title="OVOS Persona Server", |
| 47 | + description="OpenAI/Ollama compatible API for OVOS Personas and Solvers", |
| 48 | + version=version_str) |
225 | 49 |
|
226 | | - def streaming(): |
227 | | - for ans in persona.stream(messages, lang=sess.lang, units=sess.system_unit): |
228 | | - data = { |
229 | | - "model": persona.name, |
230 | | - "created_at": completion_timestamp, |
231 | | - "message": { |
232 | | - "role": "assistant", |
233 | | - "content": ans |
234 | | - }, |
235 | | - "done": False, |
236 | | - # "context": [1, 2, 3], |
237 | | - # "total_duration": 10706818083, |
238 | | - # "load_duration": 6338219291, |
239 | | - # "prompt_eval_count": 26, |
240 | | - # "prompt_eval_duration": 130079000, |
241 | | - # "eval_count": 259, |
242 | | - # "eval_duration": 4232710000 |
243 | | - } |
244 | | - content = json.dumps(data) |
245 | | - yield content + "\n" |
| 50 | + app.add_middleware( |
| 51 | + CORSMiddleware, |
| 52 | + allow_origins=["*"], # Allows all origins |
| 53 | + allow_credentials=True, |
| 54 | + allow_methods=["*"], # Allows all methods (GET, POST, etc.) |
| 55 | + allow_headers=["*"], # Allows all headers |
| 56 | + ) |
246 | 57 |
|
247 | | - end_completion_data = { |
248 | | - "model": persona.name, |
249 | | - "created_at": completion_timestamp, |
250 | | - "message": { |
251 | | - "role": "assistant", |
252 | | - "content": "" |
253 | | - }, |
254 | | - "done": True, |
255 | | - # "context": [1, 2, 3], |
256 | | - # "total_duration": 10706818083, |
257 | | - # "load_duration": 6338219291, |
258 | | - # "prompt_eval_count": 26, |
259 | | - # "prompt_eval_duration": 130079000, |
260 | | - # "eval_count": 259, |
261 | | - # "eval_duration": 4232710000 |
262 | | - } |
263 | | - content = json.dumps(end_completion_data) |
264 | | - yield content + "\n" |
| 58 | + # Include routers for different API functionalities |
| 59 | + # imported here only after the Persona object is loaded |
| 60 | + from ovos_persona_server.chat import chat_router |
| 61 | + from ovos_persona_server.ollama import ollama_router |
265 | 62 |
|
266 | | - return app.response_class(streaming(), mimetype="text/event-stream") |
| 63 | + app.include_router(chat_router) |
| 64 | + app.include_router(ollama_router) |
267 | 65 |
|
268 | | - @app.route("/api/tags", methods=["GET"]) |
269 | | - def tags(): |
270 | | - return {"models": [ |
271 | | - {"name": persona.name, "model": str(persona.solvers.sort_order[0])} |
272 | | - ]} |
273 | 66 |
|
274 | 67 | return app |
0 commit comments