File tree Expand file tree Collapse file tree 3 files changed +715
-0
lines changed
vllm_patched/model_executor/models Expand file tree Collapse file tree 3 files changed +715
-0
lines changed Original file line number Diff line number Diff line change @@ -17,6 +17,7 @@ RUN pip install -r requirements.txt
1717# RUN pip install .
1818ENV ACCELERATOR=CUDA
1919# Copy our application source code
20+ COPY vllm_patched /usr/local/lib/python3.12/dist-packages/vllm
2021COPY ./homl_server ./homl_server
2122WORKDIR /app/homl_server
2223# The base image exposes port 8000, so we don't need to do it again.
Original file line number Diff line number Diff line change 3636MODEL_LOAD_TIMEOUT = int (os .environ .get ("HOML_MODEL_LOAD_TIMEOUT" , 180 )) # seconds
3737# # This is the time after which a model will be unloaded if it is idle
3838MODEL_UNLOAD_IDLE_TIME = int (os .environ .get ("HOML_MODEL_UNLOAD_IDLE_TIME" , 600 )) # 10 minutes default
39+ module_info_cache = os .path .join (MODEL_HOME , "module_info_cache" )
3940
4041os .makedirs (os .path .join (MODEL_HOME , "home" ), exist_ok = True )
4142os .makedirs (MODEL_LIB , exist_ok = True )
4243os .makedirs (TORCH_CACHE , exist_ok = True )
44+ os .makedirs (module_info_cache , exist_ok = True )
4345os .environ ["TORCHINDUCTOR_CACHE_DIR" ] = TORCH_CACHE
46+ os .environ ["VLLM_LAZY_LOAD_MODULE_INFO_CACHE" ] = module_info_cache
4447# Ensure cache and lib directories exist
4548
4649
You can’t perform that action at this time.
0 commit comments