-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark-e2e.py
More file actions
36 lines (29 loc) · 1.16 KB
/
benchmark-e2e.py
File metadata and controls
36 lines (29 loc) · 1.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import time
import requests
from pydantic import BaseModel
class Query(BaseModel):
query: str
def send_query(query_text: str):
headers = {
"Host": "www.preretrieval.leonliang.lu"
}
query = Query(query=query_text)
url = "http://localhost:9000/query"
start_total = time.time()
for i in range(10):
try:
start = time.time()
response = requests.post(url, json=query.dict(), headers=headers)
response.raise_for_status()
duration = time.time() - start
total_duration = time.time() - start_total
print(f"Request succeeded on attempt {i+1} in {duration:.3f}s, total elapsed {total_duration:.3f}s")
return response.json(), total_duration
except requests.exceptions.RequestException as e:
print(f"[Attempt {i+1}] Request failed: {e}")
time.sleep(0.5)
total_duration = time.time() - start_total
raise Exception(f"Query failed after 10 retries. Total elapsed: {total_duration:.3f}s")
response, duration = send_query("What's the default batch size?")
print("Response:", response)
print(f"Duration: {duration:.3f} seconds")