Skip to content

[Usage]: How should I use the CPU to deploy QWEN3 VL 30B-A3B? #27912

@maxgameone

Description

@maxgameone

Your current environment

The output of `python collect_env.py`

(APIServer pid=1033476) Traceback (most recent call last):
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/bin/vllm", line 33, in
(APIServer pid=1033476) sys.exit(load_entry_point('vllm==0.11.1rc6.dev33+g3a5de7d2d.cpu', 'console_scripts', 'vllm')())
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/entrypoints/cli/main.py", line 73, in main
(APIServer pid=1033476) args.dispatch_function(args)
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/entrypoints/cli/serve.py", line 59, in cmd
(APIServer pid=1033476) uvloop.run(run_server(args))
(APIServer pid=1033476) File "/home/maxgameone/.local/lib/python3.12/site-packages/uvloop/init.py", line 109, in run
(APIServer pid=1033476) return __asyncio.run(
(APIServer pid=1033476) ^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/asyncio/runners.py", line 194, in run
(APIServer pid=1033476) return runner.run(main)
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/asyncio/runners.py", line 118, in run
(APIServer pid=1033476) return self._loop.run_until_complete(task)
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=1033476) File "/home/maxgameone/.local/lib/python3.12/site-packages/uvloop/init.py", line 61, in wrapper
(APIServer pid=1033476) return await main
(APIServer pid=1033476) ^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 1910, in run_server
(APIServer pid=1033476) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 1926, in run_server_worker
(APIServer pid=1033476) async with build_async_engine_client(
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/contextlib.py", line 210, in aenter
(APIServer pid=1033476) return await anext(self.gen)
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 185, in build_async_engine_client
(APIServer pid=1033476) async with build_async_engine_client_from_engine_args(
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/contextlib.py", line 210, in aenter
(APIServer pid=1033476) return await anext(self.gen)
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 232, in build_async_engine_client_from_engine_args
(APIServer pid=1033476) async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/utils/func_utils.py", line 116, in inner
(APIServer pid=1033476) return fn(*args, **kwargs)
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/async_llm.py", line 218, in from_vllm_config
(APIServer pid=1033476) return cls(
(APIServer pid=1033476) ^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/async_llm.py", line 140, in init
(APIServer pid=1033476) self.engine_core = EngineCoreClient.make_async_mp_client(
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core_client.py", line 121, in make_async_mp_client
(APIServer pid=1033476) return AsyncMPClient(*client_args)
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core_client.py", line 808, in init
(APIServer pid=1033476) super().init(
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core_client.py", line 469, in init
(APIServer pid=1033476) with launch_core_engines(vllm_config, executor_class, log_stats) as (
(APIServer pid=1033476) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/contextlib.py", line 144, in exit
(APIServer pid=1033476) next(self.gen)
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/utils.py", line 898, in launch_core_engines
(APIServer pid=1033476) wait_for_engine_startup(
(APIServer pid=1033476) File "/home/maxgameone/anaconda3/lib/python3.12/site-packages/vllm-0.11.1rc6.dev33+g3a5de7d2d.cpu-py3.12-linux-x86_64.egg/vllm/v1/engine/utils.py", line 955, in wait_for_engine_startup
(APIServer pid=1033476) raise RuntimeError(
(APIServer pid=1033476) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
(base) maxgameone@zsi-cloud:/home/data/visionlink$ /home/maxgameone/anaconda3/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown

How would you like to use vllm

I want to run inference of a [specific model](put link here). I don't know how to integrate it with vllm.

Before submitting a new issue...

  • Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.

Metadata

Metadata

Assignees

No one assigned

    Labels

    usageHow to use vllm

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions