NVIDIA
diff --git a/‎examples/speculative_decoding/.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎examples/speculative_decoding/.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/speculative_decoding/collect_hidden_states/run_hf_compute_hiddens_dp.sh‎
Lines changed: 36 additions & 0 deletions b/‎examples/speculative_decoding/collect_hidden_states/run_hf_compute_hiddens_dp.sh‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎examples/speculative_decoding/eagle_utils.py‎
Lines changed: 5 additions & 6 deletions b/‎examples/speculative_decoding/eagle_utils.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎examples/speculative_decoding/gen_synthetic_conversations/__init__.py‎
Lines changed: 0 additions & 16 deletions b/‎examples/speculative_decoding/gen_synthetic_conversations/__init__.py‎
Lines changed: 0 additions & 16 deletions
@@ -1 +1,4 @@
 Daring-Anteater
+input_conversations
+synthetic_conversations
+ckpts
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Example usage of the script to compute the hidden states for a conversation dataset
+# This script computes hidden states using a Hugging Face model and saves them to
+# the specified output directory. It does so in a data-parallel manner across 8 GPUs, by splitting
+# the input file into 8 parts and running 8 processes in parallel, one on each GPU.
+
+# Note: depending on the write-throughput of the destination disk, this is not guaranteed
+# to yield a speed improvement compared to running the model-parallel version. Consider
+# benchmarking on a smaller dataset before launching a large run.
+
+INPUT_FILE=synthetic_conversations/daring-anteater.jsonl
+OUTPUT_DIR=/mnt/md0/eagle-hidden-states/llama1b/daring_anteater/
+
+split -n l/8 --numeric-suffixes=0 -d --additional-suffix=.jsonl $INPUT_FILE /tmp/part-
+
+for i in $(seq 0 7)
+do
+CUDA_VISIBLE_DEVICES=$i python3 collect_hidden_states/compute_hidden_states_hf.py --model meta-llama/Llama-3.2-1B-Instruct --input-file /tmp/part-0${i}.jsonl --output-dir $OUTPUT_DIR &
+done
+wait
+
+rm /tmp/part-*.jsonl
@@ -219,12 +219,13 @@ def __getitem__(self, i) -> dict[str, torch.Tensor]:
         offline_data["aux_hidden_states"] = offline_data["aux_hidden_states"][:max_length, :]
 
         # Make sure the input_ids have the same shape
-        if not torch.equal(preprocessed_base["input_ids"], offline_data["input_ids"]):
+        if preprocessed_base["input_ids"].shape != offline_data["input_ids"].shape:
             msg = f"""Input IDs from offline data do not match the preprocessed input IDs
                                 for offline data sample at {offline_file_path}."""
             raise ValueError(msg)
 
         ret = {**preprocessed_base}  # Shallow copy so we don't accidentally modify the cache
+        ret["input_ids"] = offline_data["input_ids"]
         ret["kwargs"] = {
             "base_model_outputs": {
                 "base_model_hidden_states": offline_data["hidden_states"],
@@ -370,11 +371,9 @@ def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]:
 
         batch = {
             **base_batch,
-            "kwargs": {
-                "base_model_outputs": {
-                    "base_model_hidden_states": batch_hidden_states,
-                    "aux_hidden_states": batch_aux_hidden_states,
-                }
+            "base_model_outputs": {
+                "base_model_hidden_states": batch_hidden_states,
+                "aux_hidden_states": batch_aux_hidden_states,
             },
         }