Skip to content

Commit 423bdca

Browse files
princepridehsliuustc0106
authored andcommitted
[CI]: Bagel E2E Smoked Test (#1074)
Signed-off-by: princepride <wangzhipeng628@gmail.com> Co-authored-by: Hongsheng Liu <liuhongsheng4@huawei.com>
1 parent 220cd59 commit 423bdca

File tree

7 files changed

+518
-6
lines changed

7 files changed

+518
-6
lines changed

.buildkite/pipeline.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,3 +305,38 @@ steps:
305305
hostPath:
306306
path: /mnt/hf-cache
307307
type: DirectoryOrCreate
308+
309+
- label: "Bagel Text2Img Model Test with H100"
310+
timeout_in_minutes: 30
311+
depends_on: image-build
312+
commands:
313+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
314+
- pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
315+
agents:
316+
queue: "mithril-h100-pool"
317+
plugins:
318+
- kubernetes:
319+
podSpec:
320+
containers:
321+
- image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
322+
resources:
323+
limits:
324+
nvidia.com/gpu: 1
325+
volumeMounts:
326+
- name: devshm
327+
mountPath: /dev/shm
328+
- name: hf-cache
329+
mountPath: /root/.cache/huggingface
330+
env:
331+
- name: HF_HOME
332+
value: /root/.cache/huggingface
333+
nodeSelector:
334+
node.kubernetes.io/instance-type: gpu-h100-sxm
335+
volumes:
336+
- name: devshm
337+
emptyDir:
338+
medium: Memory
339+
- name: hf-cache
340+
hostPath:
341+
path: /mnt/hf-cache
342+
type: DirectoryOrCreate

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ dev = [
5959
"psutil>=7.2.0",
6060
"soundfile>=0.13.1",
6161
"imageio[ffmpeg]>=0.6.0",
62-
"opencv-python>=4.12.0.88"
62+
"opencv-python>=4.12.0.88",
63+
"mooncake-transfer-engine==0.3.8.post1"
6364
]
6465

6566
docs = [
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# stage config for running BAGEL with Mooncake connector for CI e2e tests.
2+
# This config is optimized for single GPU tests with Mooncake inter-stage communication.
3+
4+
stage_args:
5+
- stage_id: 0
6+
stage_type: llm
7+
runtime:
8+
devices: "0"
9+
max_batch_size: 1
10+
engine_args:
11+
model_stage: thinker
12+
model_arch: BagelForConditionalGeneration
13+
worker_type: ar
14+
scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler
15+
gpu_memory_utilization: 0.60
16+
enforce_eager: true
17+
trust_remote_code: true
18+
engine_output_type: text
19+
distributed_executor_backend: mp
20+
enable_prefix_caching: false
21+
max_num_batched_tokens: 32768
22+
tensor_parallel_size: 1
23+
omni_kv_config:
24+
need_send_cache: true
25+
kv_transfer_criteria:
26+
type: prefill_finished
27+
final_output: true
28+
final_output_type: text
29+
is_comprehension: true
30+
default_sampling_params:
31+
temperature: 0.4
32+
top_p: 0.9
33+
top_k: 1
34+
max_tokens: 2048
35+
seed: 52
36+
detokenize: true
37+
repetition_penalty: 1.05
38+
output_connectors:
39+
to_stage_1: mooncake_connector
40+
- stage_id: 1
41+
stage_type: diffusion
42+
runtime:
43+
devices: "0"
44+
max_batch_size: 1
45+
engine_args:
46+
model_stage: dit
47+
gpu_memory_utilization: 0.4
48+
enforce_eager: true
49+
trust_remote_code: true
50+
engine_output_type: image
51+
distributed_executor_backend: mp
52+
enable_prefix_caching: false
53+
max_num_batched_tokens: 32768
54+
tensor_parallel_size: 1
55+
omni_kv_config:
56+
need_recv_cache: true
57+
engine_input_source: [0]
58+
final_output: true
59+
final_output_type: image
60+
is_comprehension: false
61+
default_sampling_params:
62+
seed: 52
63+
input_connectors:
64+
from_stage_0: mooncake_connector
65+
66+
# Top-level runtime config with Mooncake connector
67+
runtime:
68+
enabled: true
69+
defaults:
70+
window_size: -1
71+
max_inflight: 1
72+
connectors:
73+
mooncake_connector:
74+
name: MooncakeConnector
75+
extra:
76+
host: "${MOONCAKE_HOST}"
77+
metadata_server: "http://${MOONCAKE_HOST}:${MOONCAKE_HTTP_PORT}/metadata"
78+
master: "${MOONCAKE_HOST}:${MOONCAKE_RPC_PORT}"
79+
segment: 64000000
80+
localbuf: 64000000
81+
proto: tcp
82+
edges:
83+
- from: 0
84+
to: 1
85+
window_size: -1
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# stage config for running BAGEL with SharedMemory connector for CI e2e tests.
2+
# This config is optimized for single GPU tests with SharedMemory inter-stage communication.
3+
4+
stage_args:
5+
- stage_id: 0
6+
stage_type: llm
7+
runtime:
8+
devices: "0"
9+
max_batch_size: 1
10+
engine_args:
11+
model_stage: thinker
12+
model_arch: BagelForConditionalGeneration
13+
worker_type: ar
14+
scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler
15+
gpu_memory_utilization: 0.65
16+
enforce_eager: true
17+
trust_remote_code: true
18+
engine_output_type: text
19+
distributed_executor_backend: "mp"
20+
enable_prefix_caching: false
21+
max_num_batched_tokens: 32768
22+
tensor_parallel_size: 1
23+
omni_kv_config:
24+
need_send_cache: true
25+
kv_transfer_criteria:
26+
type: prefill_finished #or special token generated
27+
final_output: true
28+
final_output_type: text
29+
is_comprehension: true
30+
default_sampling_params:
31+
temperature: 0.4
32+
top_p: 0.9
33+
top_k: 1
34+
max_tokens: 2048
35+
seed: 52
36+
detokenize: True
37+
repetition_penalty: 1.05
38+
39+
- stage_id: 1
40+
stage_type: diffusion
41+
runtime:
42+
devices: "0"
43+
max_batch_size: 1
44+
engine_args:
45+
model_stage: dit
46+
gpu_memory_utilization: 0.4
47+
enforce_eager: true
48+
trust_remote_code: true
49+
engine_output_type: image
50+
distributed_executor_backend: "mp"
51+
enable_prefix_caching: false
52+
max_num_batched_tokens: 32768
53+
tensor_parallel_size: 1
54+
omni_kv_config:
55+
need_recv_cache: true
56+
engine_input_source: [0]
57+
58+
final_output: true
59+
final_output_type: image
60+
is_comprehension: false
61+
default_sampling_params:
62+
seed: 52
63+
64+
# Runtime edges
65+
runtime:
66+
enabled: true
67+
defaults:
68+
window_size: -1
69+
max_inflight: 1
70+
71+
# Distributed connectors configuration (optional)
72+
# More connectors will be supported in the future.
73+
connectors:
74+
shared_memory_connector:
75+
name: SharedMemoryConnector
76+
extra:
77+
shm_threshold_bytes: 65536 # 64KB threshold
78+
79+
80+
edges:
81+
- from: 0
82+
to: 1
83+
window_size: -1

0 commit comments

Comments
 (0)