Skip to content

Commit 538771b

Browse files
committed
Refactor dataloaders - part 1
Signed-off-by: Rashid Kaleem <230885705+arekay-nv@users.noreply.github.com>
1 parent 0411574 commit 538771b

File tree

9 files changed

+476
-669
lines changed

9 files changed

+476
-669
lines changed

src/inference_endpoint/commands/benchmark.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -375,35 +375,6 @@ def _get_dataset_path(args: argparse.Namespace, config: BenchmarkConfig) -> Path
375375
return dataset_path
376376

377377

378-
def _get_dataset_format(config: BenchmarkConfig, dataset_path: Path) -> str:
379-
"""Get or infer dataset format.
380-
381-
CURRENT LIMITATION: Only supports single dataset.
382-
383-
Args:
384-
config: BenchmarkConfig
385-
dataset_path: Path to dataset file
386-
387-
Returns:
388-
Dataset format string (e.g., "pkl", "hf")
389-
390-
TODO: Multi-dataset support
391-
When implemented, this should:
392-
1. Return dict[Path, str] mapping dataset paths to formats
393-
2. Validate format compatibility across datasets
394-
"""
395-
# Try to get format from config
396-
# TODO: Multi-dataset - currently just uses single dataset format
397-
single_dataset = config.get_single_dataset()
398-
if single_dataset and single_dataset.format:
399-
return single_dataset.format
400-
401-
# Infer from file extension
402-
format_str = DataLoaderFactory.infer_format(dataset_path)
403-
logger.info(f"Inferred dataset format: {format_str}")
404-
return format_str
405-
406-
407378
def _run_benchmark(
408379
config: BenchmarkConfig,
409380
collect_responses: bool,
@@ -485,13 +456,8 @@ def _run_benchmark(
485456

486457
# Get dataset - from CLI or from config
487458
# TODO: Dataset Logic is not yet fully implemented
488-
# dataset_path = _get_dataset_path(args, config)
489459
dataset_path = config.datasets[0].path
490460

491-
# Load dataset using factory
492-
dataset_format = _get_dataset_format(config, dataset_path)
493-
logger.info(f"Loading: {dataset_path} (format: {dataset_format})")
494-
495461
# Determine if streaming should be enabled based on config
496462
streaming_mode = config.model_params.streaming
497463

@@ -517,7 +483,6 @@ def _run_benchmark(
517483

518484
dataloader = DataLoaderFactory.create_loader(
519485
dataset_path,
520-
format=dataset_format,
521486
key_maps=key_maps,
522487
metadata={
523488
"model": model_name,
@@ -534,9 +499,6 @@ def _run_benchmark(
534499
except FileNotFoundError as e:
535500
logger.error(f"Dataset file not found: {dataset_path}")
536501
raise InputValidationError(f"Dataset file not found: {dataset_path}") from e
537-
except NotImplementedError as e:
538-
logger.error(f"Dataset format not supported: {dataset_format}")
539-
raise SetupError(str(e)) from e
540502
except Exception as e:
541503
logger.error("Dataset load failed")
542504
raise SetupError(f"Failed to load dataset: {e}") from e

src/inference_endpoint/dataset_manager/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,10 @@
2121

2222
from .dataloader import (
2323
DataLoader,
24-
HFDataLoader,
25-
PickleReader,
2624
)
2725
from .factory import DataLoaderFactory
2826

2927
__all__ = [
3028
"DataLoader",
3129
"DataLoaderFactory",
32-
"HFDataLoader",
33-
"PickleReader",
3430
]

0 commit comments

Comments
 (0)