Skip to content

Commit fa70b39

Browse files
Save LM-Eval results locally
1 parent 0a2e7f0 commit fa70b39

File tree

5 files changed

+219
-234
lines changed

5 files changed

+219
-234
lines changed

run-inline.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,4 @@ providers:
1818
config:
1919
base_url: ${env.BASE_URL:=http://localhost:8321/v1}
2020
use_k8s: ${env.USE_K8S:=false}
21-
# server:
22-
# port: ${env.PORT:=8321}
2321
external_providers_dir: ./providers.d

src/llama_stack_provider_lmeval/config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
from dataclasses import dataclass, field
6+
from pathlib import Path
67
from typing import Any
78

89
from llama_stack.apis.eval import BenchmarkConfig, EvalCandidate
@@ -11,7 +12,6 @@
1112

1213
from .errors import LMEvalConfigError
1314

14-
1515
@json_schema_type
1616
@dataclass
1717
class TLSConfig:
@@ -125,12 +125,16 @@ class LMEvalEvalProviderConfig:
125125
metadata: dict[str, Any] | None = None
126126
# TLS configuration - structured approach
127127
tls: TLSConfig | None = None
128+
base_dir: Path = Path(__file__).parent
129+
results_dir: Path = base_dir / "results"
130+
128131

129132
def __post_init__(self):
130133
"""Validate the configuration"""
131134
if not isinstance(self.use_k8s, bool):
132135
raise LMEvalConfigError("use_k8s must be a boolean")
133136

137+
134138
__all__ = [
135139
"TLSConfig",
136140
"LMEvalBenchmarkConfig",

src/llama_stack_provider_lmeval/inline/__init__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
import logging
2-
from typing import Optional
32

43
from llama_stack.apis.datatypes import Api
54
from llama_stack.providers.datatypes import ProviderSpec
65

7-
from llama_stack_provider_lmeval.config import LMEvalBenchmarkConfig
6+
from llama_stack_provider_lmeval.config import LMEvalEvalProviderConfig
7+
88
from .lmeval import LMEvalInline
99

1010
logger = logging.getLogger(__name__)
1111

1212
async def get_provider_impl(
13-
config: LMEvalBenchmarkConfig,
14-
deps: Optional[dict[Api, ProviderSpec]] = None,
13+
config: LMEvalEvalProviderConfig,
14+
deps: dict[Api, ProviderSpec] | None = None,
1515
) -> LMEvalInline:
1616
"""Get an inline Eval implementation from the configuration.
1717
1818
Args:
19-
config: LMEvalInlineBenchmarkConfig
20-
deps: Optional[dict[Api, ProviderSpec]] = None
19+
config: LMEvalEvalProviderConfig
20+
deps: Optional[dict[Api, Any]] = None - can be ProviderSpec or API instances
2121
2222
Returns:
2323
Configured LMEval Inline implementation
@@ -35,12 +35,12 @@ async def get_provider_impl(
3535
for arg in config.model_args:
3636
if arg.get("name") == "base_url":
3737
base_url = arg.get("value")
38-
logger.debug(f"Using base_url from config: {base_url}")
38+
logger.debug("Using base_url from config: %s", base_url)
3939
break
4040

41-
return LMEvalInline(config=config)
41+
return LMEvalInline(config=config, deps=deps)
4242
except Exception as e:
43-
raise Exception(f"Failed to create LMEval implementation: {str(e)}") from e
43+
raise RuntimeError(f"Failed to create LMEval implementation: {str(e)}") from e
4444

4545
__all__ = [
4646
"get_provider_impl",

0 commit comments

Comments
 (0)