Skip to content

Commit 86bcc39

Browse files
committed
Use nemotron post training dataset for calibration
Signed-off-by: Chenjie Luo <[email protected]>
1 parent 5b02483 commit 86bcc39

File tree

4 files changed

+37
-5
lines changed

4 files changed

+37
-5
lines changed

examples/llm_ptq/hf_ptq.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ def main(args):
297297
)
298298
else:
299299
if args.dataset is None:
300-
args.dataset = ["cnn_dailymail"]
301-
warnings.warn("No dataset specified. Defaulting to cnn_dailymail.")
300+
args.dataset = ["nemotron-post-training-dataset-v2"]
301+
warnings.warn("No dataset specified. Defaulting to nemotron-post-training-dataset-v2.")
302302
tokenizer = get_tokenizer(args.pyt_ckpt_path, trust_remote_code=args.trust_remote_code)
303303

304304
default_padding_side = tokenizer.padding_side
@@ -349,6 +349,7 @@ def main(args):
349349
tokenizer=tokenizer,
350350
batch_size=args.batch_size,
351351
num_samples=args.calib_size,
352+
max_sample_length=args.calib_seq,
352353
device=device,
353354
)
354355
model = mts.sparsify(
@@ -390,6 +391,7 @@ def main(args):
390391

391392
args.batch_size = get_max_batch_size(
392393
model,
394+
max_sample_length=args.calib_seq,
393395
sample_memory_usage_ratio=sample_memory_usage_ratio if not run_auto_quant else 1.0,
394396
sample_input_single_batch=sample_input_single_batch,
395397
enable_grad=run_auto_quant,
@@ -680,6 +682,12 @@ def output_decode(generated_ids, input_shape):
680682
type=str,
681683
default="512",
682684
)
685+
parser.add_argument(
686+
"--calib_seq",
687+
help="Maximum sequence length for calibration.",
688+
type=int,
689+
default=512,
690+
)
683691
parser.add_argument("--export_path", default="exported_model")
684692
parser.add_argument(
685693
"--dataset",

examples/llm_ptq/scripts/huggingface_example.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ if [ -n "$GPU_MAX_MEM_PERCENTAGE" ]; then
113113
PTQ_ARGS+=" --gpu_max_mem_percentage=$GPU_MAX_MEM_PERCENTAGE "
114114
fi
115115

116+
if [ -n "$CALIB_SEQ" ]; then
117+
PTQ_ARGS+=" --calib_seq=$CALIB_SEQ "
118+
fi
119+
116120
if ! $VERBOSE; then
117121
PTQ_ARGS+=" --no-verbose "
118122
fi

examples/llm_ptq/scripts/parser.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ parse_options() {
3636
USE_SEQ_DEVICE_MAP=false
3737

3838
# Parse command-line options
39-
ARGS=$(getopt -o "" -l "model:,quant:,kv_cache_quant:,tp:,pp:,sparsity:,awq_block_size:,calib:,calib_batch_size:,auto_quantize_bits:,output:,batch:,tasks:,lm_eval_tasks:,lm_eval_limit:,simple_eval_tasks:,trust_remote_code,use_seq_device_map,gpu_max_mem_percentage:,kv_cache_free_gpu_memory_fraction:,low_memory_mode,no-verbose,calib_dataset:" -n "$0" -- "$@")
39+
ARGS=$(getopt -o "" -l "model:,quant:,kv_cache_quant:,tp:,pp:,sparsity:,awq_block_size:,calib:,calib_batch_size:,auto_quantize_bits:,output:,batch:,tasks:,lm_eval_tasks:,lm_eval_limit:,simple_eval_tasks:,trust_remote_code,use_seq_device_map,gpu_max_mem_percentage:,kv_cache_free_gpu_memory_fraction:,low_memory_mode,no-verbose,calib_dataset:,calib_seq:" -n "$0" -- "$@")
4040

4141
eval set -- "$ARGS"
4242
while true; do
@@ -64,19 +64,24 @@ parse_options() {
6464
--no-verbose ) VERBOSE=false; shift;;
6565
--low_memory_mode ) LOW_MEMORY_MODE=true; shift;;
6666
--calib_dataset ) CALIB_DATASET="$2"; shift 2;;
67+
--calib_seq ) CALIB_SEQ="$2"; shift 2;;
6768
-- ) shift; break ;;
6869
* ) break ;;
6970
esac
7071
done
7172

7273
DEFAULT_CALIB_SIZE=512
74+
DEFAULT_CALIB_SEQ=512
7375
DEFAULT_CALIB_BATCH_SIZE=0
7476
DEFAULT_BUILD_MAX_OUTPUT_LEN=1024
7577
DEFAULT_BUILD_MAX_BATCH_SIZE=2
7678

7779
if [ -z "$CALIB_SIZE" ]; then
7880
CALIB_SIZE=$DEFAULT_CALIB_SIZE
7981
fi
82+
if [ -z "$CALIB_SEQ" ]; then
83+
CALIB_SEQ=$DEFAULT_CALIB_SEQ
84+
fi
8085
if [ -z "$CALIB_BATCH_SIZE" ]; then
8186
CALIB_BATCH_SIZE=$DEFAULT_CALIB_BATCH_SIZE
8287
fi
@@ -144,5 +149,6 @@ parse_options() {
144149
echo "kv_cache_free_gpu_memory_fraction: $KV_CACHE_FREE_GPU_MEMORY_FRACTION"
145150
echo "low_memory_mode: $LOW_MEMORY_MODE"
146151
echo "calib_dataset: $CALIB_DATASET"
152+
echo "calib_seq: $CALIB_SEQ"
147153
echo "================="
148154
}

modelopt/torch/utils/dataset_utils.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,20 @@
5252
+ "\n"
5353
+ sample["output"],
5454
},
55+
"nemotron-post-training-dataset-v2": {
56+
"config": {
57+
"path": "nvidia/Nemotron-Post-Training-Dataset-v2",
58+
"split": ["stem", "chat", "math", "code"],
59+
},
60+
"preprocess": lambda sample: "\n".join(turn["content"] for turn in sample["messages"]),
61+
},
62+
"nemotron-post-training-dataset-v1": {
63+
"config": {
64+
"path": "nvidia/Nemotron-Post-Training-Dataset-v1",
65+
"split": ["stem", "chat", "math", "code", "tool_calling"],
66+
},
67+
"preprocess": lambda sample: "\n".join(turn["content"] for turn in sample["messages"]),
68+
},
5569
"magpie": {
5670
"config": {
5771
"path": "Magpie-Align/Magpie-Pro-MT-300K-v0.1",
@@ -321,10 +335,10 @@ def _get_free_gpu_mem():
321335
return 1
322336
elif target_data_batch < 4:
323337
return 2
324-
elif target_data_batch < 64:
338+
elif target_data_batch < 512:
325339
return target_data_batch // 4 * 4
326340
else:
327-
return 64
341+
return 512
328342

329343

330344
def _process_batch(batch_data, infer_method, max_working_batch_size=None):

0 commit comments

Comments
 (0)