File tree Expand file tree Collapse file tree 4 files changed +12
-4
lines changed
applications/ColossalChat/coati/distributed
colossalai/shardformer/modeling Expand file tree Collapse file tree 4 files changed +12
-4
lines changed Original file line number Diff line number Diff line change 21
21
container :
22
22
image : image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
23
23
options : --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb
24
- timeout-minutes : 60
24
+ timeout-minutes : 180
25
25
defaults :
26
26
run :
27
27
shell : bash
34
34
pip install --no-cache-dir -v -e .
35
35
36
36
- name : Install ChatGPT
37
+ env :
38
+ CFLAGS : " -O1"
39
+ CXXFLAGS : " -O1"
40
+ MAX_JOBS : 4
37
41
run : |
38
42
pip install flash-attn --no-build-isolation
39
43
cd applications/ColossalChat
Original file line number Diff line number Diff line change 21
21
container :
22
22
image : image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
23
23
options : --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data
24
- timeout-minutes : 30
24
+ timeout-minutes : 180
25
25
defaults :
26
26
run :
27
27
shell : bash
30
30
uses : actions/checkout@v2
31
31
32
32
- name : Install ChatGPT
33
+ env :
34
+ CFLAGS : " -O1"
35
+ CXXFLAGS : " -O1"
36
+ MAX_JOBS : 4
33
37
run : |
34
38
pip install flash-attn --no-build-isolation
35
39
cd applications/ColossalChat
Original file line number Diff line number Diff line change @@ -530,4 +530,4 @@ def state_dict(self):
530
530
model = self .policy_model .unwrap ()
531
531
state_dict = model .state_dict ()
532
532
state_dict ["consumer_global_step" ] = torch .tensor ([self .global_step ], device = self .device )
533
- return state_dict
533
+ return state_dict
Original file line number Diff line number Diff line change @@ -273,7 +273,7 @@ def qwen3_for_causal_lm_forward(
273
273
hidden_states : Optional [torch .FloatTensor ] = None ,
274
274
stage_index : Optional [List [int ]] = None ,
275
275
shard_config : ShardConfig = None ,
276
- ** kwargs
276
+ ** kwargs ,
277
277
):
278
278
r"""
279
279
Args:
You can’t perform that action at this time.
0 commit comments