[dpp] added return_key to samplers in [utils] to return sample_key, set to False by default, but turned on for dpp

dliu-ibm · dliu-ibm · commit 4ad711c64f45 · 2025-09-24T12:41:52.000-04:00
Signed-off-by: dliu-ibm &lt;dliu@ibm.com&gt;
diff --git a/aiu_fms_testing_utils/testing/validation.py b/aiu_fms_testing_utils/testing/validation.py
@@ -440,9 +440,9 @@ def get_validation_info_path(
     if aftu_version is None:
         aftu_version = version_tuple
 
-    enforce_sizes = kwargs.get("enforce_sizes", None)
+    sample_key = kwargs.get("sample_key", None)
 
-    validation_file_name = f"{get_default_validation_prefix(model_variant, max_new_tokens, batch_size, seq_length, dtype, attn_type, '.'.join([str(_) for _ in aftu_version[:3]])), enforce_sizes}.{device_type}_validation_info.{seed}.out"
+    validation_file_name = f"{get_default_validation_prefix(model_variant, max_new_tokens, batch_size, seq_length, dtype, attn_type, '.'.join([str(_) for _ in aftu_version[:3]])), sample_key}.{device_type}_validation_info.{seed}.out"
     full_path = os.path.join(validation_info_dir, validation_file_name)
     return full_path
 
diff --git a/aiu_fms_testing_utils/utils/__init__.py b/aiu_fms_testing_utils/utils/__init__.py
@@ -11,6 +11,7 @@
 
 from aiu_fms_testing_utils.utils.aiu_setup import dprint, rank, world_size
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+from aiu_fms_testing_utils.testing.validation import format_kwargs_to_string
 
 from fms.utils.generation import pad_input_ids
 import torch
@@ -479,6 +480,7 @@ def sample_rag_factoid_requests(
     enforce_sizes: List[int] = [],
     truncation: bool = False,
     pad_multiple: int = 64,
+    return_key: bool = False,
 ) -> List[Tuple[str, int]]:
     if not os.path.exists(dataset_path):
         print("error dataset does not exist")
@@ -489,7 +491,7 @@ def sample_rag_factoid_requests(
         for line in f:
             dataset.append(line)
 
-    return __sample_requests(
+    sample_request = __sample_requests(
         dataset,
         num_requests,
         tokenizer,
@@ -503,6 +505,24 @@ def sample_rag_factoid_requests(
         _cached_dataset_key=dataset_path,
     )
 
+    sample_key: str = format_kwargs_to_string(
+        dataset="rag_factoid",
+        num_requests=num_requests,
+        tokenizer=tokenizer.name_or_path.replace("/", "--"),
+        prompt_length_min=prompt_length_min,
+        prompt_length_max=prompt_length_max,
+        seed=seed,
+        enforce_heterogeneous=enforce_heterogeneous,
+        enforce_sizes=enforce_sizes,
+        truncate=truncation,
+        pad_multiple=pad_multiple,
+    )
+
+    if return_key:
+        return sample_request, sample_key
+    else:
+        return sample_request
+
 
 def sample_sharegpt_requests(
     dataset_path: str,
@@ -515,6 +535,7 @@ def sample_sharegpt_requests(
     enforce_sizes: List[int] | None = None,
     truncation: bool = False,
     pad_multiple: int = 64,
+    return_key: bool = False,
 ) -> List[Tuple[str, int]]:
     if not os.path.exists(dataset_path):
         print("downloading share-gpt dataset as it does not exist")
@@ -540,7 +561,7 @@ def sample_sharegpt_requests(
     dataset = [data for data in dataset if len(data["conversations"]) >= 2]
     dataset: List[str] = [data["conversations"][0]["value"] for data in dataset]
 
-    return __sample_requests(
+    sample_request = __sample_requests(
         dataset,
         num_requests,
         tokenizer,
@@ -554,6 +575,24 @@ def sample_sharegpt_requests(
         _cached_dataset_key=dataset_path,
     )
 
+    sample_key: str = format_kwargs_to_string(
+        dataset="sharegpt",
+        num_requests=num_requests,
+        tokenizer=tokenizer.name_or_path.replace("/", "--"),
+        prompt_length_min=prompt_length_min,
+        prompt_length_max=prompt_length_max,
+        seed=seed,
+        enforce_heterogeneous=enforce_heterogeneous,
+        enforce_sizes=enforce_sizes,
+        truncate=truncation,
+        pad_multiple=pad_multiple,
+    )
+
+    if return_key:
+        return sample_request, sample_key
+    else:
+        return sample_request
+
 
 def sample_squad_v2_qa_requests(
     dataset_path: str,
diff --git a/scripts/drive_paged_programs.py b/scripts/drive_paged_programs.py
@@ -245,7 +245,7 @@ def __custom_line_sampler(*args, **kwargs):
 
 def __prepare_inputs(batch_size, seq_length, tokenizer, enforce_sizes=[], seed=0):
     start = time.time()
-    prompts_and_sizes = sampler(
+    prompts_and_sizes, sample_key = sampler(
         DATASET_PATH,
         batch_size,
         tokenizer,
@@ -254,6 +254,7 @@ def __prepare_inputs(batch_size, seq_length, tokenizer, enforce_sizes=[], seed=0
         seed,
         enforce_sizes=enforce_sizes,
         truncation=allow_truncation,
+        return_key=True,
     )
     end = time.time()
     if local_rank == 0:
@@ -274,7 +275,7 @@ def __prepare_inputs(batch_size, seq_length, tokenizer, enforce_sizes=[], seed=0
 
     input_ids, extra_kwargs = pad_input_ids(prompt_list, min_pad_length=seq_length)
     extra_kwargs["mask"] = extra_kwargs["mask"].to(torch.float16)
-    return input_ids, extra_kwargs
+    return input_ids, extra_kwargs, sample_key
 
 
 def __maybe_prepare_fp8_weights(model_in, is_fp8):
@@ -367,13 +368,14 @@ def __load_validation_info(
 
 # warmup with any input so compiler produces criteria json
 # TODO: Swap this with __prepare_inputs once fix for shape_id is available
-# input_ids, extra_kwargs = __prepare_inputs(2, max_tkv, tokenizer)
+# input_ids, extra_kwargs, sample_key = __prepare_inputs(2, max_tkv, tokenizer)
 prompt_list = [torch.arange(0, 64, dtype=torch.int64)]
 # matching vllm warmup to pad to 2 on fp8, and no pad for fp16
 if is_fp8:
     prompt_list = prompt_list * 2
 input_ids, extra_kwargs = pad_input_ids(prompt_list, min_pad_length=64)
 extra_kwargs["mask"] = extra_kwargs["mask"].to(torch.float16)
+
 extra_kwargs["attn_name"] = ATTN_NAME
 if (
     "granite-3.3-8b-instruct" in model_variant
@@ -572,8 +574,8 @@ def __metric_calculator(r: torch.Tensor, t: torch.Tensor):
             itertools.islice(itertools.cycle(possible_seq_lengths), valid_prompt[0] - 1)
         )
 
-    input_ids, extra_kwargs = __prepare_inputs(
-        valid_prompt[0], valid_prompt[1], tokenizer, enforce_sizes=enforce_sizes
+    input_ids, extra_kwargs, sample_key = __prepare_inputs(
+        valid_prompt[0], valid_prompt[1], tokenizer, enforce_sizes=[valid_prompt[1]]
     )
     extra_kwargs["attn_name"] = ATTN_NAME
     if (
@@ -622,7 +624,7 @@ def __metric_calculator(r: torch.Tensor, t: torch.Tensor):
                         0,
                         ATTN_NAME,
                         dtype=CPU_DTYPE,
-                        enforce_sizes=[valid_prompt[1]],
+                        sample_key=sample_key,
                     )
                 )