Skip to content

Commit 4a7665d

Browse files
authored
Fix benchmark bug (#7002)
* fix bug * fix wint4 convert
1 parent f560733 commit 4a7665d

File tree

4 files changed

+33
-12
lines changed

4 files changed

+33
-12
lines changed

llm/benchmark.sh

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
export PYTHONPATH=$(dirname $(pwd)):$PYTHONPATH
216

317
export FLAGS_control_flow_use_new_executor=1
@@ -6,10 +20,10 @@ export FLAGS_allocator_strategy=naive_best_fit
620
export FLAGS_fraction_of_gpu_memory_to_use=0.92
721

822
python predictor.py \
9-
--model_name_or_path ./llama-13b-inference_model_fp16 \
23+
--model_name_or_path ./llama7b-inference_model_fp16 \
1024
--dtype float16 \
1125
--src_length 300 \
12-
--max_length 400 \
26+
--max_length 100 \
1327
--output_file "infer.json" \
1428
--mode "static" \
1529
--batch_size 1 \

llm/predictor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ def _preprocess(self, source):
352352
inputs = dybatch_preprocess(
353353
self.tokenizer,
354354
source,
355+
self.config.src_length,
355356
self.config.max_length,
356357
self.architectures,
357358
top_p=self.config.top_p,
@@ -369,6 +370,7 @@ def _preprocess(self, source):
369370
inputs = dybatch_preprocess(
370371
self.tokenizer,
371372
source,
373+
self.config.src_length,
372374
self.config.max_length,
373375
self.architectures,
374376
top_p=self.config.top_p,
@@ -431,6 +433,7 @@ def _preprocess(self, source):
431433
inputs = dybatch_preprocess(
432434
self.tokenizer,
433435
source,
436+
self.config.src_length,
434437
self.config.max_length,
435438
self.architectures,
436439
top_p=self.config.top_p,
@@ -790,9 +793,6 @@ def benchmark(predictor, predictor_args, model_args):
790793
test_texts = "hello world, how are you?"
791794
benchmark_texts = [test_texts + "<pad>" * predictor_args.src_length for _ in range(predictor_args.batch_size)]
792795

793-
benchmark_texts = [
794-
"<pad>" * (predictor_args.src_length // 2 - 3) + "My name is " for _ in range(predictor_args.batch_size)
795-
]
796796
batch_benchmark_texts = batchfy_text(benchmark_texts, predictor_args.batch_size)
797797
print("***********Start Benchmark**********")
798798

llm/utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ def pad_batch_data(insts, pad_id=0, return_seq_len=False, pad_style="right"):
385385
def dybatch_preprocess(
386386
tokenizer,
387387
texts: list[str],
388+
src_length: int,
388389
max_length: int,
389390
architectures: str,
390391
top_p: float,
@@ -398,7 +399,7 @@ def dybatch_preprocess(
398399
position_ids = []
399400

400401
for text in texts:
401-
tokens = tokenizer(text, return_tensors="np", padding=True)
402+
tokens = tokenizer(text, return_tensors="np", padding=True, max_length=src_length)
402403
input_ids.append(tokens["input_ids"][0])
403404
position_ids.append(tokens["position_ids"][0])
404405

@@ -423,6 +424,7 @@ def dybatch_preprocess(
423424
text,
424425
return_tensors="np",
425426
padding=False,
427+
max_length=src_length,
426428
return_attention_mask=False,
427429
return_token_type_ids=False,
428430
)
@@ -434,7 +436,7 @@ def dybatch_preprocess(
434436
bs = inputs["input_ids"].shape[0]
435437
max_len = max(map(len, input_ids))
436438

437-
position_ids = paddle.zeros(shape=[bs, max_length], dtype="int64")
439+
position_ids = paddle.zeros(shape=[bs, max_length + src_length], dtype="int64")
438440

439441
for i in range(bs):
440442
position_ids[i, pre_caches_length : pre_caches_length + seq_len[i]] = paddle.arange(seq_len[i])
@@ -490,7 +492,8 @@ def dybatch_preprocess(
490492
[
491493
1
492494
if not benchmark
493-
else max_length, # Note(Zhengzekang): When in benchmark mode, we need to set a fixed decode length.
495+
else max_length
496+
- pre_caches_length, # Note(Zhengzekang): When in benchmark mode, we need to set a fixed decode length.
494497
]
495498
* bs
496499
)

paddlenlp/experimental/transformers/fused_transformer_layers.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,13 @@ def __init__(
212212
self.quant_bits = quant_bits
213213
self.use_weight_only = False
214214
self.weight_dtype = self._dtype
215+
self.create_params_type = self._dtype
215216

216217
if self.quant_bits != -1:
217218
self.use_weight_only = True
219+
self.create_params_type = (
220+
"int8" # If use weightonly int4, params dtype is int8, and one of the dimension will be half.
221+
)
218222
self.weight_dtype = "int" + str(self.quant_bits)
219223

220224
self.ln_scales, self.ln_biases = [], []
@@ -292,7 +296,7 @@ def _add_parameter(param):
292296
qkv_weight = self.create_parameter(
293297
shape=qkv_weight_shape,
294298
attr=qkv_weight_attr,
295-
dtype=self.weight_dtype,
299+
dtype=self.create_params_type,
296300
is_bias=False,
297301
)
298302

@@ -321,7 +325,7 @@ def _add_parameter(param):
321325
linear_weight = self.create_parameter(
322326
shape=linear_weight_shape,
323327
attr=linear_weight_attr,
324-
dtype=self.weight_dtype,
328+
dtype=self.create_params_type,
325329
is_bias=False,
326330
)
327331

@@ -371,7 +375,7 @@ def _add_parameter(param):
371375
ffn1_weight = self.create_parameter(
372376
shape=ffn1_weight_shape,
373377
attr=ffn1_weight_attr,
374-
dtype=self.weight_dtype,
378+
dtype=self.create_params_type,
375379
is_bias=False,
376380
)
377381

@@ -401,7 +405,7 @@ def _add_parameter(param):
401405
ffn2_weight = self.create_parameter(
402406
shape=ffn2_weight_shape,
403407
attr=ffn2_weight_attr,
404-
dtype=self.weight_dtype,
408+
dtype=self.create_params_type,
405409
is_bias=False,
406410
)
407411

0 commit comments

Comments
 (0)