Skip to content

Commit 5f42066

Browse files
LeTriet17WayneLe17
andauthored
Fix/deepseek openai server (#1071)
* Pin dependencies to specific versions for reproducibility Update fastapi to 0.115.8 Update sse_starlette to 2.2.1 Update pydantic to 2.10.6 Update uvicorn to 0.34.0 Update torch to 2.6.0 * Update GPU type when build llama-cpp same with inference --------- Co-authored-by: WayneLe17 <wayne.le@rennlabs.com>
1 parent 3170e93 commit 5f42066

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

misc/deepseek_openai_server.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,22 +113,22 @@
113113
# Install all Python dependencies at once
114114
.pip_install(
115115
[
116-
"fastapi",
117-
"sse_starlette",
118-
"pydantic",
119-
"uvicorn[standard]",
120-
"python-multipart",
121-
"starlette-context",
122-
"pydantic-settings",
123-
"ninja",
124-
"packaging",
116+
"fastapi==0.115.8",
117+
"sse_starlette==2.2.1",
118+
"pydantic==2.10.6",
119+
"uvicorn[standard]==0.34.0",
120+
"python-multipart==0.0.20",
121+
"starlette-context==0.3.6",
122+
"pydantic-settings==2.7.1",
123+
"ninja==1.11.1.3",
124+
"packaging==24.2",
125125
"wheel",
126-
"torch",
127-
]
126+
"torch==2.6.0",
127+
],
128128
)
129129
.run_commands(
130130
'CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python',
131-
gpu=modal.gpu.A10G(count=1),
131+
gpu=modal.gpu.L40S(count=1),
132132
)
133133
.entrypoint([]) # remove NVIDIA base container entrypoint
134134
)
@@ -231,7 +231,7 @@ def serve():
231231
ModelSettings(
232232
model=model_path, # Replace with your model path
233233
n_gpu_layers=-1, # Use all GPU layers
234-
n_ctx=8096 * 4,
234+
n_ctx=8096,
235235
n_batch=512,
236236
n_threads=12,
237237
verbose=True,

0 commit comments

Comments
 (0)