Skip to content

Commit adbf38e

Browse files
committed
chatbot v10 has a Server class
1 parent ef432b7 commit adbf38e

File tree

3 files changed

+402
-2
lines changed

3 files changed

+402
-2
lines changed

notebooks/tps/chatbot/.teacher/README-chatbot-corrige-nb.md

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,41 @@ plutôt que de proposer une liste de modèles "en dur" comme dans le *starter co
472472

473473
dans mon implémentation j'ai choisi de "cacher" ce résultat, pour ne pas redemander plusieurs fois cette liste à un même serveur (cette liste bouge très très peu...); mais c'est optionnel; par contre ce serait sympa pour les utilisateurs de conserver, lorsque c'est possible, le modèle choisi lorsqu'on change de serveur...
474474

475-
+++
475+
## v10 (optionnel): une classe `Server`
476+
477+
dans cette version, je vous propose de **créer une classe `Server` abstraite**,
478+
qui définit une API commune pour interagir avec un serveur d'IA; puis une classe
479+
concrète **`OllamaServer`** qui hérite de `Server` et qui encapsule la logique
480+
d'interaction avec l'API ollama - puisque pour l'instant nos deux serveurs
481+
offrent la même API
482+
483+
mais de cette façon dans le futur (étape suivante) on pourra plus simplement
484+
ajouter le code pour interagir avec d'autres types de serveurs, qui implémentent
485+
une API différente (par exemple `litellm` que nous avons aussi déployé à
486+
l'Inria)
487+
488+
c'est pourquoi dans cette v10, je vous propose de rester à fonctionnalités
489+
constantes, mais de créer une classe `OllamaServer` qui hérite de
490+
`Server` et qui implémente les méthodes suivantes:
491+
492+
```python
493+
class Server:
494+
"""
495+
an abstract server class
496+
"""
497+
def list_models(self) -> list[str]:
498+
pass
499+
def generate_blocking(self, prompt, model, streaming ) -> list[str]:
500+
"""
501+
non-streaming generation - returns a list of text chunks
502+
"""
503+
pass
504+
def generate_streaming(self, prompt, model, streaming) -> Iterator[str]:
505+
"""
506+
streaming generation - yields text chunks
507+
"""
508+
pass
509+
```
476510

477511
## plein d'améliorations possibles
478512

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
"""
2+
instead of using a hard-wired list of models,
3+
we fetch the list of supported models at the server
4+
at the api/tags endpoint using GET
5+
"""
6+
7+
import json
8+
from typing import Iterator
9+
10+
import requests
11+
import flet as ft
12+
13+
# in this version we create servers as INSTANCES of CLASSES
14+
# so we can encapsulate the logic to interact with them
15+
#
16+
# rationale is to be able to talk with servers that implement other APIs
17+
# e.g. litellm that has also been deployed and on more servers
18+
19+
# we keep the idea of specifying our available servers as this dictionary
20+
# but below we'll use this to create actual server INSTANCES
21+
22+
SERVER_SPECS = {
23+
# this one is fast because it has GPUs,
24+
# but it requires a login / password
25+
'GPU': {
26+
"name": "GPU fast",
27+
"url": "https://ollama-sam.inria.fr",
28+
"username": "Bob",
29+
"password": "hiccup",
30+
},
31+
# this one is slow because it has no GPUs,
32+
# but it does not require a login / password
33+
'CPU': {
34+
"name": "CPU slow",
35+
"url": "http://ollama.pl.sophia.inria.fr:8080",
36+
},
37+
}
38+
39+
TITLE = "My first Chatbot 10"
40+
41+
class Server:
42+
"""
43+
an abstract server class
44+
"""
45+
def list_model_names(self) -> list[str]:
46+
pass
47+
def generate_blocking(self, prompt, model) -> list[str]:
48+
"""
49+
non-streaming generation - returns a list of text chunks
50+
"""
51+
pass
52+
def generate_streaming(self, prompt, model) -> Iterator[str]:
53+
"""
54+
streaming generation - yields text chunks
55+
"""
56+
pass
57+
58+
59+
class OllamaServer(Server):
60+
"""
61+
for servers that comply with ollama's API
62+
"""
63+
def __init__(self, name, url, username=None, password=None):
64+
self.name = name
65+
self.url = url
66+
self.username = username
67+
self.password = password
68+
69+
def _authenticate_extra_args(self) -> dict:
70+
auth_args = {}
71+
if self.username is not None:
72+
auth_args = {
73+
'auth': (self.username, self.password)
74+
}
75+
return auth_args
76+
77+
def list_model_names(self):
78+
url = f"{self.url}/api/tags"
79+
auth_args = self._authenticate_extra_args()
80+
answer = requests.get(url, **auth_args)
81+
print(f"HTTP retcod on {url}:", answer.status_code)
82+
if not (200 <= answer.status_code < 300):
83+
print("not 2xx, aborting")
84+
return []
85+
raw = answer.json()
86+
return [model['name'] for model in raw['models']]
87+
88+
def generate_blocking(self, prompt, model):
89+
url = f"{self.url}/api/generate"
90+
auth_args = self._authenticate_extra_args()
91+
payload = {'model': model, 'prompt': prompt}
92+
result = []
93+
94+
answer = requests.post(url, json=payload, **auth_args)
95+
print(f"HTTP retcod on {url}:", answer.status_code)
96+
if not (200 <= answer.status_code < 300):
97+
print("not 2xx, aborting")
98+
return result
99+
for line in answer.text.split("\n"):
100+
# splitting artefacts can be ignored
101+
if not line:
102+
continue
103+
# there should be no exception, but just in case...
104+
try:
105+
data = json.loads(line)
106+
# the last JSON chunk contains statistics and is not a message
107+
if data['done']:
108+
break
109+
result.append(data['response'])
110+
except Exception as e:
111+
print(f"Exception {type(e)=}, {e=}")
112+
return result
113+
114+
def generate_streaming(self, prompt, model):
115+
url = f"{self.url}/api/generate"
116+
auth_args = self._authenticate_extra_args()
117+
payload = {'model': model, 'prompt': prompt}
118+
result = []
119+
120+
answer = requests.post(url, json=payload, stream=True, **auth_args)
121+
print(f"HTTP retcod on {url}:", answer.status_code)
122+
if not (200 <= answer.status_code < 300):
123+
print("not 2xx, aborting")
124+
return
125+
for line in answer.iter_lines():
126+
if not line:
127+
continue
128+
try:
129+
data = json.loads(line)
130+
if data['done']:
131+
return
132+
yield data['response']
133+
except Exception as e:
134+
print(f"Exception {type(e)=}, {e=}")
135+
136+
137+
SERVERS = {}
138+
for key, spec in SERVER_SPECS.items():
139+
SERVERS[key] = OllamaServer(
140+
name=spec['name'],
141+
url=spec['url'],
142+
username=spec.get('username', None),
143+
password=spec.get('password', None),
144+
)
145+
146+
class History(ft.Column):
147+
"""
148+
the history is a column of text messages
149+
where prompts and answers alternate
150+
"""
151+
152+
def __init__(self, app):
153+
super().__init__(
154+
[ft.TextField(
155+
label="Type a message...",
156+
on_submit=lambda event: app.send_request(event),
157+
fill_color="lightgrey",
158+
)],
159+
scroll=ft.ScrollMode.AUTO,
160+
auto_scroll=True,
161+
expand=True,
162+
)
163+
164+
# insert material - prompt or answer - to allow for different styles
165+
def add_prompt(self, message):
166+
self._add_entry(message, "prompt")
167+
def add_answer(self, message):
168+
self._add_entry(message, "answer")
169+
def _add_entry(self, message, kind):
170+
display = ft.Text(value=message)
171+
display.color = "blue" if kind == "prompt" else "green"
172+
display.size = 20 if kind == "prompt" else 16
173+
display.italic = kind == "prompt"
174+
self.controls.insert(-1, display)
175+
176+
# we always insert in the penultimate position
177+
# given that the last item in controls is the prompt TextField
178+
def add_chunk(self, chunk):
179+
self.controls[-2].value += chunk
180+
def current_prompt(self):
181+
return self.controls[-1].value
182+
183+
def enable_prompt(self):
184+
self.controls[-1].disabled = False
185+
def disable_prompt(self):
186+
self.controls[-1].disabled = True
187+
188+
class ChatbotApp(ft.Column):
189+
190+
def __init__(self):
191+
# we keep a cache of available models on each server
192+
self.model_names_per_server = {}
193+
194+
header = ft.Text(value=TITLE, size=40)
195+
196+
self.streaming = ft.Checkbox(label="streaming", value=True)
197+
# will be populated later
198+
self.model = ft.Dropdown(
199+
# options=[],
200+
width=300,
201+
)
202+
self.server = ft.Dropdown(
203+
options=[ft.dropdown.Option(server)
204+
for server in SERVER_SPECS.keys()],
205+
value="GPU",
206+
width=100,
207+
on_change=lambda event: self.update_models(),
208+
)
209+
210+
self.submit = ft.ElevatedButton("Send", on_click=self.send_request)
211+
212+
self.history = History(self)
213+
214+
row = ft.Row(
215+
[self.streaming, self.model, self.server, self.submit],
216+
alignment=ft.MainAxisAlignment.CENTER,
217+
)
218+
super().__init__(
219+
[header, row, self.history],
220+
horizontal_alignment=ft.CrossAxisAlignment.CENTER,
221+
expand=True,
222+
)
223+
224+
# go fetch the relevant models for the selected server
225+
# as explained below, at this point we are not yet in the page
226+
# so we cannot yet call update() at this point
227+
self.update_models(update=False)
228+
229+
def fetch_models(self):
230+
# already fetched ?
231+
if self.server.value in self.model_names_per_server:
232+
return
233+
server_instance = SERVERS[self.server.value]
234+
model_names = server_instance.list_model_names()
235+
# for usability: sort the models alphabetically
236+
model_names.sort()
237+
self.model_names_per_server[self.server.value] = model_names
238+
239+
def update_models(self, *, update=True):
240+
# preserve current setting as far as possible
241+
current_model = self.model.value
242+
self.fetch_models()
243+
available_models = self.model_names_per_server[self.server.value]
244+
# replace the current options with the new ones
245+
self.model.options = [
246+
ft.dropdown.Option(model) for model in self.model_names_per_server[self.server.value]
247+
]
248+
# preserve setting if possible, otherwise pick first one
249+
if current_model in available_models:
250+
self.model.value = current_model
251+
else:
252+
# xxx somehow the first model on GPU - all-minilm:22m-l6-v2-fp16
253+
# returns an error saying the model does not support generate
254+
# so, as a workaround, find the first model that does not start with all-
255+
self.model.value = next(
256+
model for model in available_models if not model.startswith("all-")
257+
)
258+
# a subtle point here: because we call update_models in the constructor,
259+
# and because at that time the app is not yet in the page
260+
# we cannot call update() in that circumstance()
261+
# BUT since this method is bound the the 'change' event on the server widget
262+
# in that circumstance we need to update
263+
if update:
264+
self.update()
265+
266+
def send_request(self, _event):
267+
# disable the button to prevent double submission
268+
self.submit.disabled = True
269+
self.history.disable_prompt()
270+
self.send_request_2(_event)
271+
self.submit.disabled = False
272+
self.history.enable_prompt()
273+
self.update()
274+
275+
276+
# send the prompt to the server and display the answer
277+
def send_request_2(self, _event):
278+
model = self.model.value
279+
prompt = self.history.current_prompt()
280+
server_instance = SERVERS[self.server.value]
281+
282+
# record the question asked
283+
self.history.add_prompt(prompt)
284+
# create placeholder for the answer
285+
self.history.add_answer("")
286+
# update UI
287+
self.update()
288+
289+
# send the request
290+
streaming = self.streaming.value
291+
292+
print(f"Sending message to {server_instance.name}, {model=}, {streaming=}, {prompt=}")
293+
294+
# streaming or non streaming
295+
if not streaming:
296+
# not streaming = blocking
297+
answers = server_instance.generate_blocking(prompt, model)
298+
for text in answers:
299+
self.history.add_chunk(text)
300+
self.update()
301+
else:
302+
# streaming version
303+
answers = server_instance.generate_streaming(prompt, model)
304+
for text in answers:
305+
self.history.add_chunk(text)
306+
self.update()
307+
308+
309+
def main(page: ft.Page):
310+
page.title = TITLE
311+
312+
chatbot = ChatbotApp()
313+
page.add(chatbot)
314+
315+
316+
ft.app(target=main)

0 commit comments

Comments
 (0)