Skip to content

Commit 40f39fd

Browse files
authored
Restrict size_per_head in GPT to use FT. (#1441)
* Restrict size_per_head to use FT. * Fix GPT chinese detokenizing for FasterGPT samples.
1 parent 6a3f295 commit 40f39fd

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

examples/language_model/gpt/faster_gpt/infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def do_predict(args):
124124
paddle.fluid.core._cuda_synchronize(place)
125125
logger.info("Average test time for decoding is %f ms" % (
126126
(time.time() - start) / 50 * 1000))
127-
output_sequence = out_seq.numpy()
127+
output_sequence = out_seq.numpy().tolist()
128128
for i in range(args.batch_size):
129129
print("========== Sample-%d ==========" % i)
130130
print(tokenizer.convert_ids_to_string(output_sequence[i]))

paddlenlp/ops/faster_transformer/sample/gpt_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def do_predict(args):
130130
paddle.device.cuda.synchronize(place)
131131
logger.info("Average test time for decoding is %f ms" % (
132132
(time.time() - start) / 50 * 1000))
133-
output_sequence = out_seq.numpy()
133+
output_sequence = out_seq.numpy().tolist()
134134
for i in range(args.batch_size):
135135
print("========== Sample-%d ==========" % i)
136136
print(tokenizer.convert_ids_to_string(output_sequence[i]))

paddlenlp/transformers/gpt/modeling.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,13 @@ def prepare_faster_entry(self, kwargs):
11271127
raise AttributeError(
11281128
"'beam_search' is not supported yet in the faster version of GPT"
11291129
)
1130+
# Currently, FasterTransformer only support restricted size_per_head.
1131+
size_per_head = self.gpt.config["hidden_size"] // self.gpt.config[
1132+
"num_attention_heads"]
1133+
if size_per_head not in [32, 64, 128]:
1134+
raise AttributeError(
1135+
"'size_per_head = %d' is not supported yet in the faster version of GPT"
1136+
% size_per_head)
11301137
self._faster_entry = FasterGPT(
11311138
self, use_fp16_decoding=use_fp16_decoding).forward
11321139
return self._faster_entry

0 commit comments

Comments
 (0)