Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion inference/huggingface/text-generation/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@
parser.add_argument("--test_performance", action='store_true', help="enable latency, bandwidth, and throughout testing")
parser.add_argument("--local_rank", type=int, default=int(os.getenv("LOCAL_RANK", "0")), help="local rank")
parser.add_argument("--world_size", type=int, default=int(os.getenv("WORLD_SIZE", "1")), help="world_size")
parser.add_argument("--test_hybrid_engine", action='store_true', help="enable hybrid engine testing")
parser.add_argument("--test_hybrid_engine", action='store_true', help="enable hybrid engine testing")
parser.add_argument("--quantize_groups", type=int, required=False, default=0, help="number of weight quantization groups to use")
1 change: 1 addition & 0 deletions inference/huggingface/text-generation/inference-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
replace_with_kernel_inject=args.use_kernel,
max_tokens=args.max_tokens,
save_mp_checkpoint_path=args.save_mp_checkpoint_path,
quantize_groups=args.quantize_groups,
**ds_kwargs
)

Expand Down