diff --git a/tools/mock-vllm/app.py b/tools/mock-vllm/app.py index e4d02d15..18e021fe 100644 --- a/tools/mock-vllm/app.py +++ b/tools/mock-vllm/app.py @@ -2,6 +2,7 @@ import time from typing import List, Optional +import uvicorn from fastapi import FastAPI from pydantic import BaseModel @@ -79,3 +80,7 @@ def estimate_tokens(text: str) -> int: # Some SDKs look for token_usage; keep it as an alias for convenience. "token_usage": usage, } + + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000)