Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/twinkle/server/gateway/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from twinkle.server.telemetry.tracing import inject_context
from twinkle.utils.logger import get_logger
from twinkle_client.http.headers import H_MULTIPLEX, H_MULTIPLEX_LEGACY, H_REQUEST_ID
from twinkle_client.http.headers import H_MULTIPLEX, H_MULTIPLEX_LEGACY, H_REQUEST_ID, H_REQUEST_ID_LEGACY

logger = get_logger()

Expand Down Expand Up @@ -72,7 +72,7 @@ def _prepare_headers(self, request_headers) -> dict[str, str]:
headers = dict(request_headers)
headers.pop('host', None)
headers.pop('content-length', None)
request_id = request_headers.get(H_REQUEST_ID)
request_id = request_headers.get(H_REQUEST_ID) or request_headers.get(H_REQUEST_ID_LEGACY)
if request_id is not None and not request_headers.get(H_MULTIPLEX_LEGACY):
headers[H_MULTIPLEX_LEGACY] = request_id
if request_id is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/twinkle/server/utils/ray_serve_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _patched_setup_request_context_and_handle(
logger.debug(f'[Ray Serve Patch] Matched multiplexed_model_id: {multiplexed_model_id}')

# Original logic for other headers (unchanged)
if decoded_key == 'x-request-id':
if decoded_key in ('x-request-id', 'x-ray-serve-request-id'):
request_context_info['request_id'] = value.decode()

import ray.serve.context as serve_context
Expand Down
3 changes: 2 additions & 1 deletion src/twinkle_client/http/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

# -- request id --
H_REQUEST_ID = 'x-request-id'
H_REQUEST_ID_LEGACY = 'X-Ray-Serve-Request-Id'
Comment thread
Yunnglin marked this conversation as resolved.

# -- multiplexed model id (sticky routing) --
H_MULTIPLEX = 'serve_multiplexed_model_id'
Expand All @@ -22,7 +23,7 @@
H_AUTH = 'Authorization'
H_AUTH_TWINKLE = 'Twinkle-Authorization'

_ROUTING_HEADERS = (H_REQUEST_ID, H_MULTIPLEX, H_MULTIPLEX_LEGACY)
_ROUTING_HEADERS = (H_REQUEST_ID, H_REQUEST_ID_LEGACY, H_MULTIPLEX, H_MULTIPLEX_LEGACY)
Comment thread
Yunnglin marked this conversation as resolved.
_AUTH_HEADERS = (H_AUTH, H_AUTH_TWINKLE)


Expand Down
Loading