Skip to content

Commit 510fe09

Browse files
feat: add OCI exports configuration to evaluation jobs (#90)
* feat: add OCI exports configuration to evaluation jobs Add EvaluationExports, EvaluationExportsOCI, OCIConnectionConfig, and OCICoordinates models to support OCI artifact persistence in evaluation job submissions. Export new models from the public API surface. Co-Authored-By: Claude <noreply@anthropic.com> * fix: linting from ruff * chore: update readme with minimal info * chore: remove examples in pydantic hint --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 9b5a913 commit 510fe09

File tree

6 files changed

+241
-33
lines changed

6 files changed

+241
-33
lines changed

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,16 @@ from evalhub.adapter import (
383383
**EvalHub Service User:**
384384
```python
385385
# Interacting with EvalHub REST API
386-
from evalhub.client import EvalHubClient
387-
from evalhub.models.api import ModelConfig, JobSubmissionRequest, BenchmarkConfig
386+
from evalhub import (
387+
EvalHubClient,
388+
BenchmarkConfig,
389+
EvaluationExports,
390+
EvaluationExportsOCI,
391+
JobSubmissionRequest,
392+
ModelConfig,
393+
OCIConnectionConfig,
394+
OCICoordinates,
395+
)
388396
```
389397

390398
## Complete Example

src/evalhub/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
BenchmarkInfo,
3030
CollectionRef,
3131
ErrorResponse,
32+
EvaluationExports,
33+
EvaluationExportsOCI,
3234
EvaluationJob,
3335
EvaluationResponse,
3436
EvaluationResult,
@@ -40,6 +42,8 @@
4042
JobStatus,
4143
JobSubmissionRequest,
4244
ModelConfig,
45+
OCIConnectionConfig,
46+
OCICoordinates,
4347
)
4448

4549
__version__ = "0.1.4"
@@ -52,6 +56,8 @@
5256
"BenchmarkInfo",
5357
"CollectionRef",
5458
"ErrorResponse",
59+
"EvaluationExports",
60+
"EvaluationExportsOCI",
5561
"EvaluationJob",
5662
"EvaluationResponse",
5763
"EvaluationResult",
@@ -63,6 +69,8 @@
6369
"JobStatus",
6470
"JobSubmissionRequest",
6571
"ModelConfig",
72+
"OCIConnectionConfig",
73+
"OCICoordinates",
6674
]
6775

6876
# Conditional imports based on available extras

src/evalhub/models/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
CollectionRef,
1515
ErrorInfo,
1616
ErrorResponse,
17+
EvaluationExports,
18+
EvaluationExportsOCI,
1719
EvaluationJob,
1820
EvaluationJobResource,
1921
EvaluationJobResults,
@@ -31,6 +33,8 @@
3133
JobStatus,
3234
JobSubmissionRequest,
3335
ModelConfig,
36+
OCIConnectionConfig,
37+
OCICoordinates,
3438
PassCriteria,
3539
PrimaryScore,
3640
Provider,
@@ -41,8 +45,12 @@
4145
__all__ = [
4246
# Job & Evaluation models
4347
"JobStatus",
48+
"EvaluationExports",
49+
"EvaluationExportsOCI",
4450
"EvaluationStatus",
4551
"ModelConfig",
52+
"OCIConnectionConfig",
53+
"OCICoordinates",
4654
"EvaluationResult",
4755
"EvaluationJob",
4856
"EvaluationJobResource",

src/evalhub/models/api.py

Lines changed: 51 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,49 @@ class ExperimentConfig(BaseModel):
281281
)
282282

283283

284+
class OCICoordinates(BaseModel):
285+
"""OCI artifact coordinates for persistence."""
286+
287+
oci_host: str = Field(..., description="OCI registry host (e.g., 'quay.io')")
288+
oci_repository: str = Field(
289+
..., description="OCI repository (e.g., 'my-org/my-repo')"
290+
)
291+
oci_tag: str | None = Field(default=None, description="OCI tag (e.g., 'eval-123')")
292+
oci_subject: str | None = Field(
293+
default=None,
294+
description="Optional OCI subject identifier (in same registry and repo)",
295+
)
296+
annotations: dict[str, str] = Field(
297+
default_factory=dict, description="Custom annotations"
298+
)
299+
300+
301+
class OCIConnectionConfig(BaseModel):
302+
"""K8s connection configuration for OCI registry authentication."""
303+
304+
connection: str = Field(
305+
...,
306+
description="Name of a K8s Secret (type kubernetes.io/dockerconfigjson) for OCI registry auth",
307+
)
308+
309+
310+
class EvaluationExportsOCI(BaseModel):
311+
"""OCI export configuration for an evaluation job."""
312+
313+
coordinates: OCICoordinates = Field(..., description="OCI artifact coordinates")
314+
k8s: OCIConnectionConfig | None = Field(
315+
default=None, description="K8s connection for OCI registry auth"
316+
)
317+
318+
319+
class EvaluationExports(BaseModel):
320+
"""Optional exports configuration for an evaluation job."""
321+
322+
oci: EvaluationExportsOCI | None = Field(
323+
default=None, description="OCI export configuration"
324+
)
325+
326+
284327
class JobSubmissionRequest(BaseModel):
285328
"""Request to submit an evaluation job.
286329
@@ -303,6 +346,10 @@ class JobSubmissionRequest(BaseModel):
303346
default=None,
304347
description="MLFlow experiment configuration. When provided, the evaluation job will be tracked in MLFlow.",
305348
)
349+
exports: EvaluationExports | None = Field(
350+
default=None,
351+
description="Optional exports configuration (e.g., OCI artifact persistence)",
352+
)
306353

307354
@model_validator(mode="after")
308355
def check_benchmarks_or_collection(self) -> "JobSubmissionRequest":
@@ -344,6 +391,10 @@ class EvaluationJob(BaseModel):
344391
default=None,
345392
description="MLFlow experiment configuration",
346393
)
394+
exports: EvaluationExports | None = Field(
395+
default=None,
396+
description="Optional exports configuration",
397+
)
347398

348399
# Convenience properties to access nested fields
349400
@property
@@ -398,37 +449,6 @@ class EvaluationResponse(BaseModel):
398449
duration_seconds: float = Field(..., description="Total evaluation time")
399450

400451

401-
class OCICoordinates(BaseModel):
402-
"""OCI artifact coordinates for persistence."""
403-
404-
oci_host: str = Field(
405-
..., description="OCI registry host (e.g., 'quay.io')", examples=["quay.io"]
406-
)
407-
oci_repository: str = Field(
408-
...,
409-
description="OCI repository (e.g., 'my-org/my-repo')",
410-
examples=["my-org/my-repo"],
411-
)
412-
oci_tag: str | None = Field(
413-
default=None, description="OCI tag (e.g., 'eval-123')", examples=["eval-123"]
414-
)
415-
oci_subject: str | None = Field(
416-
default=None,
417-
description="Optional OCI subject identifier (in same registry and repo)",
418-
examples=["quay.io/my-org/my-repo:model"],
419-
)
420-
annotations: dict[str, str] = Field(
421-
default_factory=dict,
422-
description="Custom annotations",
423-
examples=[
424-
{
425-
"model": "quay.io/my-org/my-repo:model",
426-
"some": "value",
427-
}
428-
],
429-
)
430-
431-
432452
class EvaluationJobFilesLocation(BaseModel):
433453
"""Files location for persisting as OCI artifacts for an evaluation job."""
434454

tests/unit/test_evalhub_client.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,16 @@
2828
BaseSyncClient,
2929
)
3030
from evalhub.models.api import (
31+
BenchmarkConfig,
3132
CollectionRef,
33+
EvaluationExports,
34+
EvaluationExportsOCI,
3235
EvaluationJob,
3336
JobStatus,
37+
JobSubmissionRequest,
3438
ModelConfig,
39+
OCIConnectionConfig,
40+
OCICoordinates,
3541
)
3642

3743
# Environment variable to enable real server testing
@@ -323,6 +329,65 @@ def test_sync_client_submit_job_with_collection(self) -> None:
323329

324330
client.close()
325331

332+
@pytest.mark.skipif(
333+
EVALHUB_TEST_BASE_URL is not None,
334+
reason="Skipping in real server mode - would create actual jobs",
335+
)
336+
def test_sync_client_submit_job_with_exports_oci(self) -> None:
337+
"""Test that SyncEvalHubClient can submit jobs with OCI exports configuration."""
338+
client = SyncEvalHubClient()
339+
mock_job_data = {
340+
"resource": {
341+
"id": "job_oci_1",
342+
"tenant": "default",
343+
"created_at": "2024-01-01T12:00:00Z",
344+
"updated_at": "2024-01-01T12:00:00Z",
345+
},
346+
"name": "oci-export-eval",
347+
"description": "Evaluate with OCI exports",
348+
"tags": [],
349+
"status": {"state": JobStatus.PENDING.value},
350+
"model": {"url": "http://localhost:8000/v1", "name": "test-model"},
351+
"benchmarks": [{"id": "mmlu", "provider_id": "lm_eval", "parameters": {}}],
352+
"exports": {
353+
"oci": {
354+
"coordinates": {
355+
"oci_host": "quay.io",
356+
"oci_repository": "my-org/my-repo",
357+
"oci_tag": "eval-123",
358+
},
359+
"k8s": {"connection": "my-pull-secret"},
360+
}
361+
},
362+
}
363+
mock_response = Mock()
364+
mock_response.json.return_value = mock_job_data
365+
366+
with patch.object(client, "_request", return_value=mock_response):
367+
request = JobSubmissionRequest(
368+
name="oci-export-eval",
369+
description="Evaluate with OCI exports",
370+
model=ModelConfig(url="http://localhost:8000/v1", name="test-model"),
371+
benchmarks=[
372+
BenchmarkConfig(id="mmlu", provider_id="lm_eval", parameters={})
373+
],
374+
exports=EvaluationExports(
375+
oci=EvaluationExportsOCI(
376+
coordinates=OCICoordinates(
377+
oci_host="quay.io",
378+
oci_repository="my-org/my-repo",
379+
oci_tag="eval-123",
380+
),
381+
k8s=OCIConnectionConfig(connection="my-pull-secret"),
382+
),
383+
),
384+
)
385+
job = client.jobs.submit(request)
386+
assert isinstance(job, EvaluationJob)
387+
assert job.name == "oci-export-eval"
388+
389+
client.close()
390+
326391
def test_sync_client_context_manager(self) -> None:
327392
"""Test SyncEvalHubClient as context manager."""
328393
with SyncEvalHubClient() as client:

tests/unit/test_models_api.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
CollectionRef,
1313
ErrorInfo,
1414
ErrorResponse,
15+
EvaluationExports,
16+
EvaluationExportsOCI,
1517
EvaluationJob,
1618
EvaluationResponse,
1719
EvaluationResult,
@@ -22,6 +24,8 @@
2224
JobStatus,
2325
JobSubmissionRequest,
2426
ModelConfig,
27+
OCIConnectionConfig,
28+
OCICoordinates,
2529
ProviderList,
2630
)
2731
from pydantic import ValidationError
@@ -318,6 +322,101 @@ def test_submission_excludes_none_on_dump(self) -> None:
318322
assert "collection" in dumped
319323
assert dumped["collection"]["id"] == "healthcare_v1"
320324

325+
def test_submission_with_exports_oci(self) -> None:
326+
"""Test JobSubmissionRequest with full OCI exports configuration."""
327+
request = JobSubmissionRequest(
328+
name="test-eval",
329+
model=ModelConfig(url="http://localhost:8000/v1", name="test-model"),
330+
benchmarks=[
331+
BenchmarkConfig(id="mmlu", provider_id="lm_eval", parameters={})
332+
],
333+
exports=EvaluationExports(
334+
oci=EvaluationExportsOCI(
335+
coordinates=OCICoordinates(
336+
oci_host="quay.io",
337+
oci_repository="my-org/my-repo",
338+
oci_tag="eval-123",
339+
oci_subject="quay.io/my-org/my-repo:model",
340+
annotations={"model": "llama2"},
341+
),
342+
k8s=OCIConnectionConfig(connection="my-pull-secret"),
343+
),
344+
),
345+
)
346+
assert request.exports is not None
347+
assert request.exports.oci is not None
348+
assert request.exports.oci.coordinates.oci_host == "quay.io"
349+
assert request.exports.oci.coordinates.oci_repository == "my-org/my-repo"
350+
assert request.exports.oci.coordinates.oci_tag == "eval-123"
351+
assert request.exports.oci.k8s is not None
352+
assert request.exports.oci.k8s.connection == "my-pull-secret"
353+
354+
def test_submission_with_exports_oci_minimal(self) -> None:
355+
"""Test JobSubmissionRequest with minimal OCI exports (required fields only)."""
356+
request = JobSubmissionRequest(
357+
name="test-eval",
358+
model=ModelConfig(url="http://localhost:8000/v1", name="test-model"),
359+
benchmarks=[
360+
BenchmarkConfig(id="mmlu", provider_id="lm_eval", parameters={})
361+
],
362+
exports=EvaluationExports(
363+
oci=EvaluationExportsOCI(
364+
coordinates=OCICoordinates(
365+
oci_host="quay.io",
366+
oci_repository="my-org/my-repo",
367+
),
368+
),
369+
),
370+
)
371+
assert request.exports is not None
372+
assert request.exports.oci is not None
373+
assert request.exports.oci.coordinates.oci_tag is None
374+
assert request.exports.oci.k8s is None
375+
376+
def test_submission_exports_excluded_when_none_on_dump(self) -> None:
377+
"""Test that exports is excluded from dump when not set."""
378+
request = JobSubmissionRequest(
379+
name="test-eval",
380+
model=ModelConfig(url="http://localhost:8000/v1", name="test-model"),
381+
benchmarks=[
382+
BenchmarkConfig(id="mmlu", provider_id="lm_eval", parameters={})
383+
],
384+
)
385+
dumped = request.model_dump(exclude_none=True)
386+
assert "exports" not in dumped
387+
388+
def test_submission_exports_oci_dump_matches_server_schema(self) -> None:
389+
"""Test that serialized exports matches the server's expected JSON structure."""
390+
request = JobSubmissionRequest(
391+
name="test-eval",
392+
model=ModelConfig(url="http://localhost:8000/v1", name="test-model"),
393+
benchmarks=[
394+
BenchmarkConfig(id="mmlu", provider_id="lm_eval", parameters={})
395+
],
396+
exports=EvaluationExports(
397+
oci=EvaluationExportsOCI(
398+
coordinates=OCICoordinates(
399+
oci_host="quay.io",
400+
oci_repository="my-org/my-repo",
401+
oci_tag="eval-123",
402+
),
403+
k8s=OCIConnectionConfig(connection="my-pull-secret"),
404+
),
405+
),
406+
)
407+
dumped = request.model_dump(exclude_none=True)
408+
assert dumped["exports"] == {
409+
"oci": {
410+
"coordinates": {
411+
"oci_host": "quay.io",
412+
"oci_repository": "my-org/my-repo",
413+
"oci_tag": "eval-123",
414+
"annotations": {},
415+
},
416+
"k8s": {"connection": "my-pull-secret"},
417+
}
418+
}
419+
321420

322421
class TestExperimentConfig:
323422
"""Test cases for ExperimentConfig and ExperimentTag models."""

0 commit comments

Comments
 (0)