[v0.9.1][Build][Ray] Fix protobuf version in Dockerfile (#2028) (#2306)

MengqingCao · web-flow · commit cfed4b964fb9 · 2025-08-14T17:32:57.000+08:00
### What this PR does / why we need it? Fix protobuf version in Dockerfile to resolve `AttributeError: 'str' object has no attribute 'DESCRIPTOR' when packaging message to dict` using protobuf. will remove version specification after ray-project/ray#54910 is merged backport of #2028 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added test. --------- Signed-off-by: MengqingCao <cmq0113@163.com>
diff --git a/Dockerfile b/Dockerfile
@@ -53,7 +53,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     python3 -m pip cache purge
 
 # Install modelscope (for fast download) and ray (for multinode)
-RUN python3 -m pip install modelscope ray && \
+RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
 
 CMD ["/bin/bash"]
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
@@ -50,7 +50,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     python3 -m pip cache purge
 
 # Install modelscope (for fast download) and ray (for multinode)
-RUN python3 -m pip install modelscope ray && \
+RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
 
 CMD ["/bin/bash"]
diff --git a/docs/source/faqs.md b/docs/source/faqs.md
@@ -158,12 +158,29 @@ for output in outputs:
 2. Set the following enveriments parameters:
 
 ```bash
-export LCCL_DETERMINISTIC = 1
-export HCCL_DETERMINISTIC = 1
-export ATB_MATMUL_SHUFFLE_K_ENABLE = 0
-export ATB_LLM_LCOC_ENABLE = 0
+export LCCL_DETERMINISTIC=1
+export HCCL_DETERMINISTIC=true
+export ATB_MATMUL_SHUFFLE_K_ENABLE=0
+export ATB_LLM_LCOC_ENABLE=0
 ```
 
 ### 19. How to fix the error "ImportError: Please install vllm[audio] for audio support" for Qwen2.5-Omni model？
 The `Qwen2.5-Omni` model requires the `librosa` package to be installed, you need to install the `qwen-omni-utils` package to ensure all dependencies are met `pip install qwen-omni-utils`,
 this package will install `librosa` and its related dependencies, resolving the `ImportError: No module named 'librosa'` issue and ensuring audio processing functionality works correctly.
+
+### 20. Failed to run with `ray` distributed backend?
+You might facing the following errors when running with ray backend in distributed scenarios:
+
+```
+TypeError: can't convert npu:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
+```
+
+```
+AttributeError: 'str' object has no attribute 'DESCRIPTOR' when packaging message to dict
+```
+
+This has been solved in `ray>=2.47.1`, thus we could solve this as following:
+
+```
+python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0'
+``` 
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -11,3 +11,5 @@ xgrammar
 zmq
 types-psutil
 networkx
+ray>=2.47.1
+protobuf>3.20.0
diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py
@@ -32,6 +32,8 @@
 
 os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
 
+DIST_EXECUTOR_BACKEND = ["mp", "ray"]
+
 
 def test_models_distributed_QwQ():
     example_prompts = [
@@ -63,6 +65,23 @@ def test_models_distributed_DeepSeek():
         vllm_model.generate_greedy(example_prompts, max_tokens)
 
 
+@pytest.mark.skipif(os.environ["VLLM_USE_V1"] == "1")
+@pytest.mark.parametrize("distributed_executor_backend", DIST_EXECUTOR_BACKEND)
+def test_v0_pp(distributed_executor_backend):
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    dtype = "half"
+    max_tokens = 5
+    with VllmRunner(
+            "Qwen/Qwen3-0.6B-Base",
+            dtype=dtype,
+            pipeline_parallel_size=2,
+            distributed_executor_backend=distributed_executor_backend,
+    ) as vllm_model:
+        vllm_model.generate_greedy(example_prompts, max_tokens)
+
+
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
 def test_models_distributed_topk() -> None:
     example_prompts = [
@@ -227,4 +246,4 @@ def test_models_distributed_Qwen3_with_flashcomm_v2():
             dtype="auto",
             tensor_parallel_size=2,
     ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
+        vllm_model.generate_greedy(example_prompts, max_tokens)