-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapplication.py
More file actions
83 lines (66 loc) · 2.68 KB
/
application.py
File metadata and controls
83 lines (66 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
import json
import time
def example_client():
"""Example client that uses the LLM prediction server"""
base_url = "http://localhost:8000"
# Wait for server to be ready
print("Waiting for the LLM prediction server to be ready...")
server_ready = False
while not server_ready:
try:
response = requests.get(f"{base_url}/health", timeout=1)
data = response.json()
if data["model_ready"]:
server_ready = True
print("Server is ready!")
else:
print("Model still loading, waiting...")
time.sleep(2)
except requests.exceptions.RequestException:
print("Server not responding yet, waiting...")
time.sleep(2)
# Example 1: Simple prediction with GET
text = "I would like to"
print(f"\n1. Getting predictions for: '{text}'")
response = requests.get(f"{base_url}/predict?text={text}")
data = response.json()
print("Predictions:")
for pred in data["predictions"]:
print(f" • {pred['word']} ({pred['probability']:.0%})")
print(f"Model time: {data['metadata']['model_time_ms']:.1f}ms")
# Example 2: Prediction with POST request
text = "The quick brown fox"
print(f"\n2. Getting predictions with POST for: '{text}'")
response = requests.post(
f"{base_url}/predict",
json={"text": text, "top_k": 5}
)
data = response.json()
print("Predictions:")
for pred in data["predictions"]:
print(f" • {pred['word']} ({pred['probability']:.0%})")
# Example 3: Batch predictions
texts = ["Hello world", "Once upon a", "The weather is"]
print(f"\n3. Getting batch predictions for multiple inputs")
try:
response = requests.post(
f"{base_url}/predict",
json={"text": texts, "top_k": 3}
)
data = response.json()
for i, (text, result) in enumerate(zip(texts, data)):
print(f"\nPredictions for '{text}':")
for pred in result["predictions"]:
print(f" • {pred['word']} ({pred['probability']:.0%})")
except Exception as e:
print(f"Error with batch predictions: {e}")
# Example 4: Get server stats
print("\n4. Getting server statistics")
response = requests.get(f"{base_url}/stats")
stats = response.json()
print(f"Total requests: {stats['requests']}")
print(f"Average prediction time: {stats['avg_prediction_time']*1000:.1f}ms")
print(f"Server uptime: {stats['uptime_seconds']:.0f} seconds")
if __name__ == "__main__":
example_client()