-
Notifications
You must be signed in to change notification settings - Fork 323
Test connection pool concurrency #2605
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
antoniosarosi
wants to merge
5
commits into
canary
Choose a base branch
from
antonio/concurrency
base: canary
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 1 commit
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| // Used to test connection pool concurrency | ||
|
|
||
| const http = require("http"); | ||
| const { URL } = require("url"); | ||
|
|
||
| // Get host and port. | ||
| const HOST = getArg("--host") || process.env.HOST || "127.0.0.1"; | ||
| const PORT = Number(getArg("--port") || process.env.PORT || 8001); | ||
|
|
||
| // Latency in milliseconds. | ||
| const LATENCY = Number(getArg("--latency") || process.env.LATENCY || 50); | ||
|
|
||
| // Get CLI args. | ||
| function getArg(flag) { | ||
| const i = process.argv.indexOf(flag); | ||
| return i !== -1 ? process.argv[i + 1] : undefined; | ||
| } | ||
|
|
||
| // Sleep millis. | ||
| function sleep(ms) { | ||
| return new Promise((res) => setTimeout(res, ms)); | ||
| } | ||
|
|
||
| // Respond with JSON. | ||
| function json(res, status, bodyObj) { | ||
| const body = JSON.stringify(bodyObj); | ||
| res.writeHead(status, { | ||
| "Content-Type": "application/json", | ||
| "Content-Length": Buffer.byteLength(body), | ||
| "Cache-Control": "no-store", | ||
| "Connection": "keep-alive", | ||
| // CORS (harmless if you curl) | ||
| "Access-Control-Allow-Origin": "*", | ||
| "Access-Control-Allow-Headers": "Content-Type, Authorization", | ||
| }); | ||
| res.end(body); | ||
| } | ||
|
|
||
| async function handleRequest(req, res) { | ||
| const url = new URL(req.url, `http://${req.headers.host}`); | ||
|
|
||
| // Health | ||
| if (req.method === "GET" && url.pathname === "/health") { | ||
| return json(res, 200, { ok: true }); | ||
| } | ||
|
|
||
| // Chat Completions | ||
| if (req.method === "POST" && url.pathname === "/v1/chat/completions") { | ||
| let body = ""; | ||
|
|
||
| req.on("data", chunk => body += chunk); | ||
|
|
||
| req.on("end", async () => { | ||
| // We don't actually need the request payload for this test. | ||
| // But parse if present to avoid client errors. | ||
| try { | ||
| if (body && body.length) { | ||
| JSON.parse(body); | ||
| } | ||
| } catch { | ||
| return json(res, 400, { error: { message: "Invalid JSON" } }); | ||
| } | ||
|
|
||
| // Simulate latency for concurrency testing | ||
| await sleep(LATENCY); | ||
|
|
||
| const now = Math.floor(Date.now() / 1000); | ||
|
|
||
| return json(res, 200, { | ||
| id: `cmpl-${now}-${Math.random().toString(36).slice(2, 8)}`, | ||
| object: "chat.completion", | ||
| created: now, | ||
| model: "concurrency-test", | ||
| choices: [ | ||
| { | ||
| index: 0, | ||
| message: { role: "assistant", content: "AGI" }, | ||
| finish_reason: "stop", | ||
| }, | ||
| ], | ||
| usage: { prompt_tokens: 0, completion_tokens: 1, total_tokens: 1 }, | ||
| }); | ||
| }); | ||
|
|
||
| return; | ||
| } | ||
|
|
||
| // Not found | ||
| json(res, 404, { error: { message: "Not found" } }); | ||
| } | ||
|
|
||
| const server = http.createServer(async (req, res) => { | ||
| console.log(`${req.method} ${req.url}`); | ||
|
|
||
| try { | ||
| await handleRequest(req, res); | ||
| } catch (e) { | ||
| json(res, 500, { error: { message: e?.message || "Internal error" } }); | ||
| } | ||
| }); | ||
|
|
||
| server.listen(PORT, HOST, () => { | ||
| process.stdout.write(`listening http://${HOST}:${PORT}\n`); | ||
| }); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| import asyncio | ||
| import contextlib | ||
| import os | ||
| import pathlib | ||
| import socket | ||
| import time | ||
| import shutil | ||
|
|
||
| from baml_py import ClientRegistry | ||
| import pytest | ||
| from baml_client import b | ||
|
|
||
|
|
||
| def find_free_port(): | ||
| with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: | ||
| s.bind(("127.0.0.1", 0)) | ||
| return s.getsockname()[1] | ||
|
|
||
|
|
||
| async def wait_for_port(host: str, port: int, timeout_s: float = 15.0): | ||
| deadline = asyncio.get_running_loop().time() + timeout_s | ||
| while True: | ||
| try: | ||
| reader, writer = await asyncio.open_connection(host, port) | ||
| writer.close() | ||
| try: | ||
| await writer.wait_closed() | ||
| except Exception: | ||
| pass | ||
| return | ||
| except Exception: | ||
| if asyncio.get_running_loop().time() > deadline: | ||
| raise RuntimeError(f"Port {host}:{port} did not open in time") | ||
| await asyncio.sleep(0.05) | ||
|
|
||
|
|
||
| async def try_http_health(host: str, port: int, timeout_s: float = 2.0): | ||
| deadline = asyncio.get_running_loop().time() + timeout_s | ||
| request = ( | ||
| "GET /health HTTP/1.1\r\n" | ||
| f"Host: {host}:{port}\r\n" | ||
| "Connection: close\r\n" | ||
| "\r\n" | ||
| ).encode("ascii") | ||
|
|
||
| while True: | ||
| try: | ||
| reader, writer = await asyncio.open_connection(host, port) | ||
| writer.write(request) | ||
| await writer.drain() | ||
| data = await reader.read(-1) | ||
| writer.close() | ||
| try: | ||
| await writer.wait_closed() | ||
| except Exception: | ||
| pass | ||
| if b" 200 " in data: | ||
| return True | ||
| except Exception: | ||
| pass | ||
|
|
||
| if asyncio.get_running_loop().time() > deadline: | ||
| return False | ||
| await asyncio.sleep(0.05) | ||
|
|
||
|
|
||
| async def terminate_process(proc: asyncio.subprocess.Process): | ||
| if proc.returncode is None: | ||
| try: | ||
| proc.terminate() | ||
| except ProcessLookupError: | ||
| return | ||
| try: | ||
| await asyncio.wait_for(proc.wait(), timeout=3) | ||
| except asyncio.TimeoutError: | ||
| try: | ||
| proc.kill() | ||
| except ProcessLookupError: | ||
| pass | ||
|
|
||
|
|
||
| async def pump_stdout(proc: asyncio.subprocess.Process, buf: list[str]): | ||
| if not proc.stdout: | ||
| return | ||
| try: | ||
| while True: | ||
| line = await proc.stdout.readline() | ||
| if not line: | ||
| break | ||
| try: | ||
| buf.append(line.decode("utf-8", errors="ignore")) | ||
| except Exception: | ||
| buf.append(repr(line)) | ||
| except Exception: | ||
| pass | ||
|
|
||
|
|
||
| @contextlib.asynccontextmanager | ||
| async def start_openai_generic_server(latency: int): | ||
| server_js_path = pathlib.Path(__file__).parent.parent.parent / "common" / "concurrent_server.js" | ||
| if not server_js_path.exists(): | ||
| raise FileNotFoundError(f"Server script not found: {server_js_path}") | ||
|
|
||
| node_bin = shutil.which("node") or shutil.which("nodejs") | ||
| if not node_bin: | ||
| raise RuntimeError("Cannot find 'node' or 'nodejs' on PATH") | ||
|
|
||
| host = "127.0.0.1" | ||
| port = find_free_port() | ||
|
|
||
| cmd = [node_bin, str(server_js_path), "--host", host, "--port", str(port), "--latency", str(latency)] | ||
| env = os.environ.copy() | ||
|
|
||
| proc = await asyncio.create_subprocess_exec( | ||
| *cmd, | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.STDOUT, | ||
| cwd=os.getcwd(), | ||
| env=env, | ||
| ) | ||
|
|
||
| log_buf: list[str] = [] | ||
| pump_task = asyncio.create_task(pump_stdout(proc, log_buf)) | ||
|
|
||
| base_url = f"http://{host}:{port}" | ||
|
|
||
| try: | ||
| await wait_for_port(host, port, timeout_s=15.0) | ||
| await try_http_health(host, port, timeout_s=2.0) | ||
| except Exception as e: | ||
| await terminate_process(proc) | ||
|
|
||
| try: | ||
| await asyncio.wait_for(pump_task, timeout=0.3) | ||
| except Exception: | ||
| pass | ||
|
|
||
| logs = "".join(log_buf) | ||
|
|
||
| raise RuntimeError(f"Failed to start Node server: {e}\n--- server output ---\n{logs}") | ||
|
|
||
| try: | ||
| yield f"{base_url}/v1" | ||
| finally: | ||
| await terminate_process(proc) | ||
|
|
||
| try: | ||
| await asyncio.wait_for(pump_task, timeout=0.3) | ||
| except Exception: | ||
| pass | ||
|
|
||
| logs = "".join(log_buf) | ||
| print(f"--- Concurrency Test Server Output ---\n{logs}") | ||
|
|
||
| try: | ||
| await asyncio.wait_for(pump_task, timeout=0.5) | ||
| except Exception: | ||
| pass | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_connection_pool_concurrency(): | ||
| # How many requests to make. | ||
| num_requests = 20 | ||
|
|
||
| # How long the server takes to process one request. | ||
| latency_ms = 500 | ||
|
|
||
| # Allow some extra time per request (scheduling overhead). | ||
| allowed_deviation_ms = 3 * num_requests | ||
|
|
||
| # If the requests are running concurrently, they should all complete within | ||
| # the request latency plus some extra processing time. But if they run | ||
| # sequentially in batches as the bug report suggests, they will take much | ||
| # longer: | ||
| # | ||
| # https://github.com/BoundaryML/baml/issues/2594 | ||
| expected_duration_ms = latency_ms + allowed_deviation_ms | ||
|
|
||
| async with start_openai_generic_server(latency_ms) as base_url: | ||
| cr = ClientRegistry() | ||
| cr.add_llm_client("ConcurrencyTestClient", "openai-generic", { | ||
| "model": "concurrency-test", | ||
| "base_url": base_url, | ||
| }) | ||
| cr.set_primary("ConcurrencyTestClient") | ||
|
|
||
| coros = [b.TestOpenAI("test", {"client_registry": cr}) for _ in range(num_requests)] | ||
|
|
||
| start_time = time.perf_counter() | ||
| timeout_s = max(5.0, (expected_duration_ms / 1000.0) + 2.0) | ||
|
|
||
| results = await asyncio.wait_for(asyncio.gather(*coros), timeout=timeout_s) | ||
|
|
||
| duration_ms = (time.perf_counter() - start_time) * 1000.0 | ||
|
|
||
| assert len(results) == num_requests | ||
| assert duration_ms <= expected_duration_ms, ( | ||
| f"Expected duration <= {expected_duration_ms} ms but got {duration_ms:.2f} ms; " | ||
| "requests may not be running concurrently." | ||
| ) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Check warning
Code scanning / CodeQL
Log injection Medium
Copilot Autofix
AI 24 days ago
To prevent log injection, any user-controlled values included in the log string (such as
req.urland potentiallyreq.method) should have line breaks (\r,\n) stripped or replaced. The best and simplest mitigation is to process each such value withString.prototype.replace(/\r|\n/g, "")before logging.req.methodandreq.urlthrough a sanitizing function that removes/replaces newlines and carriage returns.sanitizeForLog(str)) that takes a string and strips newlines, then use it for bothreq.methodandreq.urlin your log statement.Apply these changes only to this file/snippet.