|
27 | 27 |
|
28 | 28 |
|
29 | 29 | DEFAULT_HTTP_TIMEOUT = 12 if "LLAMA_SANITIZE" not in os.environ else 30 |
30 | | -REQUEST_RETRIES = int(os.environ.get('LLAMA_SERVER_TEST_REQUEST_RETRIES', '1')) |
31 | 30 |
|
32 | 31 | class ServerResponse: |
33 | 32 | headers: dict |
@@ -195,6 +194,8 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None: |
195 | 194 | self.process = subprocess.Popen( |
196 | 195 | [str(arg) for arg in [server_path, *server_args]], |
197 | 196 | creationflags=flags, |
| 197 | + # stdout=subprocess.DEVNULL, |
| 198 | + # stderr=subprocess.DEVNULL, |
198 | 199 | stdout=sys.stdout, |
199 | 200 | stderr=sys.stdout, |
200 | 201 | env={**os.environ, "LLAMA_CACHE": "tmp"} if "LLAMA_CACHE" not in os.environ else None, |
@@ -240,30 +241,27 @@ def make_request( |
240 | 241 | timeout: float | None = None, |
241 | 242 | ) -> ServerResponse: |
242 | 243 | url = f"http://{self.server_host}:{self.server_port}{path}" |
243 | | - for remaining_attempts in range(REQUEST_RETRIES, 0, -1): |
244 | | - # print(f"#\ncurl {url} -d '{json.dumps(data, indent=2)}'\n") |
245 | | - parse_body = False |
246 | | - if method == "GET": |
247 | | - response = requests.get(url, headers=headers, timeout=timeout) |
248 | | - parse_body = True |
249 | | - elif method == "POST": |
250 | | - response = requests.post(url, headers=headers, json=data, timeout=timeout) |
251 | | - parse_body = True |
252 | | - elif method == "OPTIONS": |
253 | | - response = requests.options(url, headers=headers, timeout=timeout) |
254 | | - else: |
255 | | - raise ValueError(f"Unimplemented method: {method}") |
256 | | - |
257 | | - if (response is None or response.status_code != 200) and remaining_attempts > 0: |
258 | | - continue |
259 | | - result = ServerResponse() |
260 | | - result.headers = dict(response.headers) |
261 | | - result.status_code = response.status_code |
262 | | - result.body = response.json() if parse_body else None |
263 | | - # print("Response from server", json.dumps(result.body, indent=2)) |
264 | | - return result |
265 | | - |
266 | | - raise RuntimeError(f"Failed to make request to {url} after {retries} attempts") |
| 244 | + # print(f"#\ncurl {url} -d '{json.dumps(data, indent=2)}'\n") |
| 245 | + parse_body = False |
| 246 | + if method == "GET": |
| 247 | + response = requests.get(url, headers=headers, timeout=timeout) |
| 248 | + parse_body = True |
| 249 | + elif method == "POST": |
| 250 | + response = requests.post(url, headers=headers, json=data, timeout=timeout) |
| 251 | + parse_body = True |
| 252 | + elif method == "OPTIONS": |
| 253 | + response = requests.options(url, headers=headers, timeout=timeout) |
| 254 | + else: |
| 255 | + raise ValueError(f"Unimplemented method: {method}") |
| 256 | + |
| 257 | + if (response is None or response.status_code != 200) and remaining_attempts > 0: |
| 258 | + continue |
| 259 | + result = ServerResponse() |
| 260 | + result.headers = dict(response.headers) |
| 261 | + result.status_code = response.status_code |
| 262 | + result.body = response.json() if parse_body else None |
| 263 | + # print("Response from server", json.dumps(result.body, indent=2)) |
| 264 | + return result |
267 | 265 |
|
268 | 266 |
|
269 | 267 | def make_stream_request( |
|
0 commit comments