diff --git a/README.md b/README.md index 44af6a8a5..8c28f9faa 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@

-

Your go-to toolkit for lightning-fast, flexible LLM evaluation, from Hugging Face's Leaderboard and Evals Team.

@@ -93,6 +92,14 @@ lighteval accelerate \ "leaderboard|truthfulqa:mc|0|0" ``` +### 🔒Offline Usage +If you are working in a restricted or offline network environment (e.g., behind a firewall or in an air-gapped setting), +lighteval now supports loading datasets from a local path. +```shell +export LOCAL_DATASET_PATH=/path/to/your/local/dataset +``` +Make sure the local directory contains the dataset in a structure compatible with the Hugging Face datasets library. + ## 🙏 Acknowledgements Lighteval started as an extension of the fantastic [Eleuther AI diff --git a/src/lighteval/tasks/extended/tiny_benchmarks/main.py b/src/lighteval/tasks/extended/tiny_benchmarks/main.py index d195bc89b..290236674 100644 --- a/src/lighteval/tasks/extended/tiny_benchmarks/main.py +++ b/src/lighteval/tasks/extended/tiny_benchmarks/main.py @@ -92,6 +92,10 @@ def __init__(self, task: str): def download(self): # Likely to crash in // processes if we don't include the pkl path_dld = os.path.join(pathlib.Path(__file__).parent.resolve(), "tinyBenchmarks.pkl") + local_dataset_path = os.getenv("LOCAL_DATASET_PATH", None) + if local_dataset_path is not None: + # If LOCAL_DATASET_PATH is set, use it as the tinyBenchmarks path + path_dld = os.path.join(local_dataset_path, "tinyBenchmarks.pkl") # Downloading files if not os.path.isfile(path_dld): url = "https://raw.githubusercontent.com/felipemaiapolo/tinyBenchmarks/main/tinyBenchmarks/tinyBenchmarks.pkl" diff --git a/src/lighteval/utils/utils.py b/src/lighteval/utils/utils.py index 28e0ac4a4..ed9826157 100644 --- a/src/lighteval/utils/utils.py +++ b/src/lighteval/utils/utils.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os from dataclasses import asdict, is_dataclass from typing import Callable, TypeVar, Union @@ -210,6 +211,11 @@ def download_dataset_worker( Worker function to download a dataset from the HuggingFace Hub. Used for parallel dataset loading. """ + local_dataset_path = os.getenv("LOCAL_DATASET_PATH", None) + if local_dataset_path is not None: + # If LOCAL_DATASET_PATH is set, use it as the dataset path + dataset_path = local_dataset_path + dataset = load_dataset( path=dataset_path, name=dataset_config_name,