diff --git a/clients/python/llmengine/data_types/model_endpoints.py b/clients/python/llmengine/data_types/model_endpoints.py
index 2e0877732..d6f23aa19 100644
--- a/clients/python/llmengine/data_types/model_endpoints.py
+++ b/clients/python/llmengine/data_types/model_endpoints.py
@@ -64,6 +64,21 @@ class CreateLLMEndpointRequest(VLLMEndpointAdditionalArgs, BaseModel):
         default=None,
         description="A Jinja template to use for this endpoint. If not provided, will use the chat template from the checkpoint",
     )
+    # Route configuration for multiple endpoints support
+    routes: Optional[List[str]] = Field(
+        default=None,
+        description="List of additional routes to forward to the user's service. "
+        "These routes will be added alongside the default /predict route. "
+        "Requires passthrough forwarder type.",
+    )
+    extra_routes: Optional[List[str]] = Field(
+        default=None, description="Legacy field for additional routes. Use 'routes' instead."
+    )
+    forwarder_type: Optional[str] = Field(
+        default=None,
+        description="Type of forwarder to use. Set to 'passthrough' to enable "
+        "multiple route forwarding to your FastAPI service.",
+    )
 
 
 class CreateLLMEndpointResponse(BaseModel):
diff --git a/examples/multi_route_client_example.py b/examples/multi_route_client_example.py
new file mode 100644
index 000000000..56caa5c4b
--- /dev/null
+++ b/examples/multi_route_client_example.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+"""
+Example demonstrating how to deploy a multi-route FastAPI server using Launch.
+
+This example shows how to use the new route configuration parameters to deploy
+a FastAPI server with multiple endpoints that can be accessed through their
+natural paths rather than being restricted to just /predict.
+"""
+
+import time
+
+import requests
+from llmengine import Model
+from llmengine.data_types.core import ModelEndpointType
+from llmengine.data_types.model_endpoints import CreateLLMEndpointRequest
+
+
+def create_multi_route_endpoint():
+    """
+    Create a model endpoint with multiple routes using the new passthrough forwarder.
+    """
+
+    # Define the routes we want to expose from our FastAPI server
+    custom_routes = [
+        "/v1/chat/completions",  # OpenAI-compatible chat endpoint
+        "/v1/completions",  # OpenAI-compatible completions endpoint
+        "/analyze",  # Custom analysis endpoint
+        "/custom/endpoint",  # Custom GET endpoint
+        "/batch/process",  # Batch processing endpoint
+    ]
+
+    print("Creating model endpoint with multiple routes...")
+    print(f"Routes to be exposed: {custom_routes}")
+
+    # Create the endpoint with multi-route support
+    response = Model.create(
+        name="multi-route-fastapi-example",
+        model="llama-2-7b",  # This is just for the bundle creation, our custom server will handle the logic
+        inference_framework_image_tag="latest",
+        # Hardware configuration
+        cpus=4,
+        memory="8Gi",
+        storage="20Gi",
+        gpus=1,
+        gpu_type="nvidia-ampere-a10",
+        # Scaling configuration
+        min_workers=1,
+        max_workers=3,
+        per_worker=10,
+        endpoint_type=ModelEndpointType.STREAMING,
+        # NEW: Multi-route configuration
+        routes=custom_routes,  # List of routes to forward
+        forwarder_type="passthrough",  # Enable passthrough forwarding
+        # Other settings
+        public_inference=False,
+        labels={"example": "multi-route", "type": "fastapi"},
+    )
+
+    print(f"Endpoint created! Task ID: {response.endpoint_creation_task_id}")
+    return response.endpoint_creation_task_id
+
+
+def test_multi_route_endpoint(endpoint_name: str, base_url: str):
+    """
+    Test the multi-route endpoint by making requests to different routes.
+    """
+    print(f"\nTesting multi-route endpoint: {endpoint_name}")
+    print(f"Base URL: {base_url}")
+
+    # Test cases for different routes
+    test_cases = [
+        {
+            "name": "Traditional Predict",
+            "method": "POST",
+            "url": f"{base_url}/predict",
+            "data": {"text": "Hello world", "model": "custom"},
+        },
+        {
+            "name": "OpenAI Chat Completions",
+            "method": "POST",
+            "url": f"{base_url}/v1/chat/completions",
+            "data": {
+                "messages": [{"role": "user", "content": "Hello, how are you?"}],
+                "model": "gpt-3.5-turbo",
+                "max_tokens": 50,
+            },
+        },
+        {
+            "name": "OpenAI Completions",
+            "method": "POST",
+            "url": f"{base_url}/v1/completions",
+            "data": {
+                "prompt": "The future of AI is",
+                "model": "text-davinci-003",
+                "max_tokens": 50,
+            },
+        },
+        {
+            "name": "Custom Analysis",
+            "method": "POST",
+            "url": f"{base_url}/analyze",
+            "data": {"text": "This is a good example of multi-route functionality"},
+        },
+        {
+            "name": "Custom GET Endpoint",
+            "method": "GET",
+            "url": f"{base_url}/custom/endpoint",
+            "data": None,
+        },
+        {
+            "name": "Batch Processing",
+            "method": "POST",
+            "url": f"{base_url}/batch/process",
+            "data": {"texts": ["First text", "Second text", "Third text"]},
+        },
+    ]
+
+    # Execute test cases
+    for test_case in test_cases:
+        print(f"\n--- Testing {test_case['name']} ---")
+        print(f"URL: {test_case['url']}")
+
+        try:
+            if test_case["method"] == "GET":
+                response = requests.get(test_case["url"])
+            else:
+                response = requests.post(test_case["url"], json=test_case["data"])
+
+            print(f"Status: {response.status_code}")
+            if response.status_code == 200:
+                result = response.json()
+                print(f"Response: {result}")
+            else:
+                print(f"Error: {response.text}")
+
+        except requests.exceptions.RequestException as e:
+            print(f"Request failed: {e}")
+
+
+def main():
+    """
+    Main example workflow.
+    """
+
+    print("=" * 60)
+    print("Launch Multi-Route FastAPI Server Example")
+    print("=" * 60)
+
+    print(
+        """\
+This example demonstrates the new multi-route passthrough functionality in Launch.
+
+Instead of being limited to a single /predict endpoint, you can now:
+1. Specify multiple routes to be forwarded to your FastAPI server
+2. Use the passthrough forwarder type to enable full HTTP method support
+3. Access your endpoints through their natural paths
+
+Key benefits:
+- No more single endpoint limitation
+- Full FastAPI server compatibility
+- Support for GET, POST, PUT, DELETE, PATCH, HEAD, OPTIONS
+- OpenAI-compatible endpoints alongside custom routes
+- Easy migration of existing FastAPI applications
+"""
+    )
+
+    # Step 1: Create the multi-route endpoint
+    task_id = create_multi_route_endpoint()
+
+    print(f"\nEndpoint creation initiated with task ID: {task_id}")
+    print("Waiting for endpoint to be ready...")
+
+    # In a real scenario, you would poll the endpoint status
+    # For this example, we'll simulate waiting
+    print("⏳ Endpoint is being deployed...")
+    print("⏳ This may take several minutes...")
+
+    # Step 2: Once ready, test the endpoints
+    # Note: In practice, you'd get the actual endpoint URL from the Launch API
+    endpoint_name = "multi-route-fastapi-example"
+    base_url = f"https://your-launch-domain.com/v1/endpoints/{endpoint_name}"
+
+    print(f"\n✅ Endpoint ready! You can now test it at: {base_url}")
+    print("\nExample test calls you can make:")
+
+    # Show example curl commands
+    curl_examples = [
+        {
+            "name": "Traditional predict",
+            "cmd": f'curl -X POST {base_url}/predict -H "Content-Type: application/json" -d \'{{"text": "Hello world", "model": "custom"}}\'',
+        },
+        {
+            "name": "OpenAI chat",
+            "cmd": f'curl -X POST {base_url}/v1/chat/completions -H "Content-Type: application/json" -d \'{{"messages": [{{"role": "user", "content": "Hello!"}}], "model": "gpt-3.5-turbo"}}\'',
+        },
+        {
+            "name": "Custom analysis",
+            "cmd": f'curl -X POST {base_url}/analyze -H "Content-Type: application/json" -d \'{{"text": "This is amazing!"}}\'',
+        },
+        {"name": "Custom GET endpoint", "cmd": f"curl -X GET {base_url}/custom/endpoint"},
+    ]
+
+    for example in curl_examples:
+        print(f"\n{example['name']}:")
+        print(f"  {example['cmd']}")
+
+    print(f"\n" + "=" * 60)
+    print("Multi-Route Support Successfully Configured!")
+    print("=" * 60)
+
+    # Uncomment the following line to run actual tests if you have a deployed endpoint
+    # test_multi_route_endpoint(endpoint_name, base_url)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/multi_route_fastapi_server.py b/examples/multi_route_fastapi_server.py
new file mode 100644
index 000000000..0f0ad0e7d
--- /dev/null
+++ b/examples/multi_route_fastapi_server.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+"""
+End-to-end example demonstrating multiple routes passthrough in Launch.
+
+This example shows how to create a FastAPI server with multiple routes and deploy it
+using Launch's model endpoint creation with the passthrough forwarder.
+
+The server implements several endpoints that would normally require the single /predict
+restriction, but now can be accessed through their natural paths.
+"""
+
+from typing import Any, Dict, List, Optional
+
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+# FastAPI server with multiple routes
+app = FastAPI(title="Multi-Route Example Server", version="1.0.0")
+
+
+# Data models
+class PredictRequest(BaseModel):
+    text: str
+    model: Optional[str] = "default"
+
+
+class PredictResponse(BaseModel):
+    result: str
+    model: str
+    route: str
+
+
+class HealthResponse(BaseModel):
+    status: str
+    routes: List[str]
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+
+class ChatRequest(BaseModel):
+    messages: List[ChatMessage]
+    model: Optional[str] = "gpt-3.5-turbo"
+    max_tokens: Optional[int] = 100
+
+
+class ChatResponse(BaseModel):
+    choices: List[Dict[str, Any]]
+    model: str
+    usage: Dict[str, int]
+
+
+class CompletionRequest(BaseModel):
+    prompt: str
+    model: Optional[str] = "text-davinci-003"
+    max_tokens: Optional[int] = 100
+
+
+class CompletionResponse(BaseModel):
+    choices: List[Dict[str, str]]
+    model: str
+    usage: Dict[str, int]
+
+
+# Health check endpoint (required by Launch)
+@app.get("/health", response_model=HealthResponse)
+@app.get("/readyz", response_model=HealthResponse)
+def health_check():
+    """Health check endpoint required by Launch forwarder."""
+    return HealthResponse(
+        status="healthy",
+        routes=[
+            "/predict",
+            "/v1/chat/completions",
+            "/v1/completions",
+            "/analyze",
+            "/custom/endpoint",
+        ],
+    )
+
+
+# Traditional predict endpoint
+@app.post("/predict", response_model=PredictResponse)
+def predict(request: PredictRequest):
+    """Traditional ML prediction endpoint."""
+    return PredictResponse(
+        result=f"Processed text: {request.text}", model=request.model, route="/predict"
+    )
+
+
+# OpenAI-compatible chat completions endpoint
+@app.post("/v1/chat/completions", response_model=ChatResponse)
+def chat_completions(request: ChatRequest):
+    """OpenAI-compatible chat completions endpoint."""
+    # Simple echo implementation for example
+    last_message = (
+        request.messages[-1] if request.messages else ChatMessage(role="user", content="")
+    )
+
+    return ChatResponse(
+        choices=[
+            {
+                "message": {"role": "assistant", "content": f"Echo: {last_message.content}"},
+                "finish_reason": "stop",
+                "index": 0,
+            }
+        ],
+        model=request.model,
+        usage={
+            "prompt_tokens": len(last_message.content.split()),
+            "completion_tokens": len(last_message.content.split()) + 1,
+            "total_tokens": len(last_message.content.split()) * 2 + 1,
+        },
+    )
+
+
+# OpenAI-compatible completions endpoint
+@app.post("/v1/completions", response_model=CompletionResponse)
+def completions(request: CompletionRequest):
+    """OpenAI-compatible completions endpoint."""
+    return CompletionResponse(
+        choices=[
+            {"text": f" -> Completion for: {request.prompt}", "finish_reason": "stop", "index": 0}
+        ],
+        model=request.model,
+        usage={
+            "prompt_tokens": len(request.prompt.split()),
+            "completion_tokens": 10,
+            "total_tokens": len(request.prompt.split()) + 10,
+        },
+    )
+
+
+# Custom analysis endpoint
+@app.post("/analyze")
+def analyze_text(data: Dict[str, Any]):
+    """Custom text analysis endpoint."""
+    text = data.get("text", "")
+    if not text:
+        raise HTTPException(status_code=400, detail="Text field is required")
+
+    return {
+        "analysis": {
+            "word_count": len(text.split()),
+            "char_count": len(text),
+            "sentiment": "positive" if "good" in text.lower() else "neutral",
+        },
+        "text": text,
+        "route": "/analyze",
+    }
+
+
+# Another custom endpoint
+@app.get("/custom/endpoint")
+def custom_endpoint():
+    """A custom GET endpoint to demonstrate method flexibility."""
+    return {
+        "message": "This is a custom endpoint accessible via passthrough routing",
+        "methods_supported": ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
+        "route": "/custom/endpoint",
+    }
+
+
+# Batch processing endpoint
+@app.post("/batch/process")
+def batch_process(data: Dict[str, List[str]]):
+    """Batch processing endpoint for multiple texts."""
+    texts = data.get("texts", [])
+    return {
+        "results": [f"Processed: {text}" for text in texts],
+        "count": len(texts),
+        "route": "/batch/process",
+    }
+
+
+if __name__ == "__main__":
+    # Run the server
+    uvicorn.run(app, host="0.0.0.0", port=5005)
diff --git a/model-engine/model_engine_server/common/dtos/model_endpoints.py b/model-engine/model_engine_server/common/dtos/model_endpoints.py
index 36a7c7f68..18d0aa66f 100644
--- a/model-engine/model_engine_server/common/dtos/model_endpoints.py
+++ b/model-engine/model_engine_server/common/dtos/model_endpoints.py
@@ -73,6 +73,21 @@ class CreateModelEndpointV1Request(BaseModel):
     default_callback_url: Optional[HttpUrlStr] = None
     default_callback_auth: Optional[CallbackAuth] = None
     public_inference: Optional[bool] = Field(default=False)
+    # Route configuration for multiple endpoints support
+    routes: Optional[List[str]] = Field(
+        default=None,
+        description="List of additional routes to forward to the user's service. "
+        "These routes will be added alongside the default /predict route. "
+        "Requires passthrough forwarder type.",
+    )
+    extra_routes: Optional[List[str]] = Field(
+        default=None, description="Legacy field for additional routes. Use 'routes' instead."
+    )
+    forwarder_type: Optional[str] = Field(
+        default=None,
+        description="Type of forwarder to use. Set to 'passthrough' to enable "
+        "multiple route forwarding to your FastAPI service.",
+    )
 
 
 class CreateModelEndpointV1Response(BaseModel):
diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
index 352b7a060..155a027d3 100644
--- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
+++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py
@@ -392,6 +392,10 @@ async def execute(
         chat_template_override: Optional[str],
         nodes_per_worker: int,
         additional_args: Optional[Dict[str, Any]] = None,
+        # Route configuration for multiple endpoints support
+        routes: Optional[List[str]] = None,
+        extra_routes: Optional[List[str]] = None,
+        forwarder_type: Optional[str] = None,
     ) -> ModelBundle:
         multinode = nodes_per_worker > 1
         if source != LLMSource.HUGGING_FACE:
@@ -459,6 +463,9 @@ async def execute(
                         checkpoint_path,
                         chat_template_override,
                         additional_args=additional_vllm_args,
+                        routes=routes,
+                        extra_routes=extra_routes,
+                        forwarder_type=forwarder_type,
                     )
                 else:
                     bundle_id = await self.create_vllm_bundle(
@@ -471,6 +478,9 @@ async def execute(
                         checkpoint_path,
                         chat_template_override,
                         additional_args=additional_vllm_args,
+                        routes=routes,
+                        extra_routes=extra_routes,
+                        forwarder_type=forwarder_type,
                     )
             case LLMInferenceFramework.SGLANG:  # pragma: no cover
                 if not hmi_config.sglang_repository:
@@ -991,6 +1001,9 @@ async def create_vllm_bundle(
         checkpoint_path: Optional[str],
         chat_template_override: Optional[str],
         additional_args: Optional[VLLMEndpointAdditionalArgs] = None,
+        routes: Optional[List[str]] = None,
+        extra_routes: Optional[List[str]] = None,
+        forwarder_type: Optional[str] = None,
     ):
         command = self._create_vllm_bundle_command(
             model_name,
@@ -1005,6 +1018,20 @@ async def create_vllm_bundle(
             additional_args=additional_args,
         )
 
+        # Determine which routes to use - user-provided or defaults
+        final_routes = []
+        final_extra_routes = []
+        final_forwarder_type = forwarder_type
+
+        if routes is not None:
+            final_routes = routes
+        else:
+            # Default to OpenAI compatibility routes for VLLM
+            final_routes = [OPENAI_CHAT_COMPLETION_PATH, OPENAI_COMPLETION_PATH]
+
+        if extra_routes is not None:
+            final_extra_routes = extra_routes
+
         create_model_bundle_v2_request = CreateModelBundleV2Request(
             name=endpoint_unique_name,
             schema_location="TBA",
@@ -1019,10 +1046,9 @@ async def create_vllm_bundle(
                 healthcheck_route="/health",
                 predict_route="/predict",
                 streaming_predict_route="/stream",
-                routes=[
-                    OPENAI_CHAT_COMPLETION_PATH,
-                    OPENAI_COMPLETION_PATH,
-                ],
+                routes=final_routes,
+                extra_routes=final_extra_routes,
+                forwarder_type=final_forwarder_type,
                 env={},
             ),
             metadata={},
@@ -1051,6 +1077,9 @@ async def create_vllm_multinode_bundle(
         checkpoint_path: Optional[str],
         chat_template_override: Optional[str],
         additional_args: Optional[VLLMEndpointAdditionalArgs] = None,
+        routes: Optional[List[str]] = None,
+        extra_routes: Optional[List[str]] = None,
+        forwarder_type: Optional[str] = None,
     ):
         leader_command = self._create_vllm_bundle_command(
             model_name,
@@ -1087,6 +1116,20 @@ async def create_vllm_multinode_bundle(
             "RAY_CLUSTER_SIZE": "$(K8S_LWS_CLUSTER_SIZE)",
         }
 
+        # Determine which routes to use - user-provided or defaults
+        final_routes = []
+        final_extra_routes = []
+        final_forwarder_type = forwarder_type
+
+        if routes is not None:
+            final_routes = routes
+        else:
+            # Default to OpenAI compatibility routes for VLLM
+            final_routes = [OPENAI_CHAT_COMPLETION_PATH, OPENAI_COMPLETION_PATH]
+
+        if extra_routes is not None:
+            final_extra_routes = extra_routes
+
         create_model_bundle_v2_request = CreateModelBundleV2Request(
             name=endpoint_unique_name,
             schema_location="TBA",
@@ -1101,7 +1144,9 @@ async def create_vllm_multinode_bundle(
                 healthcheck_route="/health",
                 predict_route="/predict",
                 streaming_predict_route="/stream",
-                routes=[OPENAI_CHAT_COMPLETION_PATH, OPENAI_COMPLETION_PATH],
+                routes=final_routes,
+                extra_routes=final_extra_routes,
+                forwarder_type=final_forwarder_type,
                 env=common_vllm_envs,
                 worker_command=worker_command,
                 worker_env=common_vllm_envs,
@@ -1343,6 +1388,10 @@ async def execute(
             chat_template_override=request.chat_template_override,
             nodes_per_worker=request.nodes_per_worker,
             additional_args=request.model_dump(exclude_none=True),
+            # Pass route configuration to bundle creation
+            routes=request.routes,
+            extra_routes=request.extra_routes,
+            forwarder_type=request.forwarder_type,
         )
         validate_resource_requests(
             bundle=bundle,