Skip to content

Commit b6f76f4

Browse files
Merge pull request #754 from kubeflow/main
[pull] main from kubeflow:main
2 parents 6dce839 + 06fc02d commit b6f76f4

29 files changed

+689
-241
lines changed

.github/workflows/constraints.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
pip==23.3.2
2-
nox==2023.4.22
3-
nox-poetry==1.0.3
4-
poetry==1.8.3
5-
poetry-plugin-export==1.8.0
6-
virtualenv==20.24.6
1+
pip==25.3
2+
nox==2025.11.12
3+
nox-poetry==1.2.0
4+
poetry==2.0.1
5+
poetry-plugin-export==1.9.0
6+
virtualenv==20.35.4

.github/workflows/python-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ jobs:
5353
run: |
5454
pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt poetry
5555
poetry --version
56+
pipx inject poetry poetry-plugin-export
5657
- name: Install Nox
5758
run: |
5859
pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt nox
@@ -169,6 +170,7 @@ jobs:
169170
run: |
170171
pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt poetry
171172
poetry --version
173+
pipx inject poetry poetry-plugin-export
172174
- name: Install Nox
173175
run: |
174176
pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt nox
@@ -288,6 +290,7 @@ jobs:
288290
run: |
289291
pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt poetry
290292
poetry --version
293+
pipx inject poetry poetry-plugin-export
291294
- name: Install Nox
292295
run: |
293296
pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt nox

catalog/internal/catalog/db_catalog_filterquery_test.go

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -721,8 +721,9 @@ func TestArtifactFilteringCapability(t *testing.T) {
721721
".name = $",
722722
".double_value <= $",
723723
},
724-
expectedArgs: []any{"ttft_mean", float64(90), "tpot_mean", float64(50)},
725-
description: "Should handle multiple artifact property filters with separate JOINs",
724+
// Args order: JOIN property names first, then WHERE value conditions
725+
expectedArgs: []any{"ttft_mean", "tpot_mean", float64(90), float64(50)},
726+
description: "Should handle multiple artifact property filters in a single EXISTS with multiple property JOINs on the SAME artifact",
726727
},
727728
{
728729
name: "Artifact property with LIKE",
@@ -835,20 +836,19 @@ func TestArtifactFilteringCapability(t *testing.T) {
835836
expectedFragment, generatedSQL)
836837
}
837838

838-
// Verify arguments if specified
839+
// Verify arguments if specified - ORDER MATTERS for SQL placeholder mapping
839840
if len(tt.expectedArgs) > 0 {
840-
// Check that all expected args are present (order may vary due to JOINs)
841-
// For numeric values, check value equality regardless of type (int vs float64)
842-
for _, expectedArg := range tt.expectedArgs {
843-
found := false
844-
for _, actualArg := range queryArgs {
845-
if actualArg == expectedArg {
846-
found = true
847-
break
848-
}
849-
}
850-
assert.True(t, found, "Expected argument %v not found in actual args: %v",
851-
expectedArg, queryArgs)
841+
// Args must be in exact order to match SQL placeholders ($1, $2, etc.)
842+
// This is critical for combined artifact filters where JOIN args come before WHERE args
843+
require.Equal(t, len(tt.expectedArgs), len(queryArgs)-1, // -1 for type_id
844+
"Argument count mismatch (excluding type_id)")
845+
846+
// Compare args starting from index 1 (skip type_id at index 0)
847+
for i, expectedArg := range tt.expectedArgs {
848+
actualArg := queryArgs[i+1] // +1 to skip type_id
849+
assert.Equal(t, expectedArg, actualArg,
850+
"Argument at position %d should be %v but was %v. Full args: %v",
851+
i+1, expectedArg, actualArg, queryArgs)
852852
}
853853
}
854854

@@ -1120,6 +1120,22 @@ func TestArtifactFilteringEdgeCases(t *testing.T) {
11201120
},
11211121
description: "Should handle both exact and case-insensitive matching on artifact properties (ILIKE uses UPPER for cross-DB compatibility)",
11221122
},
1123+
{
1124+
name: "Bug fix: multiple artifact filters must match SAME artifact",
1125+
filterQuery: `artifacts.hardware_type LIKE "H200" AND artifacts.ttft_p95 < 50`,
1126+
expectedSQL: []string{
1127+
"EXISTS",
1128+
`"Attribution"`,
1129+
`"Artifact"`,
1130+
// Both property JOINs should reference the same artifact (art_X)
1131+
"artprop_",
1132+
".artifact_id = art_",
1133+
// Both conditions should be in the WHERE clause
1134+
".string_value LIKE $",
1135+
".double_value < $",
1136+
},
1137+
description: "Multiple artifact property filters with AND should generate a SINGLE EXISTS with multiple property JOINs ensuring BOTH conditions match the SAME artifact (not different artifacts)",
1138+
},
11231139
{
11241140
name: "Integer literal queries both int_value and double_value",
11251141
filterQuery: `artifacts.count = 100`,

catalog/internal/catalog/performance_metrics.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ import (
2121
// metadataJSON represents the minimal structure needed from metadata.json files
2222
// Only the ID field is needed to look up existing models
2323
type metadataJSON struct {
24-
ID string `json:"id"` // Maps to model name for lookup
24+
ID string `json:"id"` // Maps to model name for lookup
25+
OverallAccuracy *float64 `json:"overall_accuracy"` // Overall accuracy score for the model
2526
}
2627

2728
// parseMetadataJSON parses JSON data into metadataJSON struct, extracting only the ID field
@@ -298,12 +299,12 @@ func processModelDirectory(dirPath string, modelRepo dbmodels.CatalogModelReposi
298299
glog.V(2).Infof("Found existing model %s with ID %d, processing metrics", metadata.ID, modelID)
299300

300301
// Use batch processing for all artifacts
301-
return processModelArtifactsBatch(dirPath, modelID, metadata.ID, metricsArtifactRepo, metricsArtifactTypeID)
302+
return processModelArtifactsBatch(dirPath, modelID, metadata.ID, metadata.OverallAccuracy, metricsArtifactRepo, metricsArtifactTypeID)
302303
}
303304

304305
// processModelArtifactsBatch processes all metric artifacts for a model in batch
305306
// This reduces DB overhead by parsing, checking, and inserting in optimized phases
306-
func processModelArtifactsBatch(dirPath string, modelID int32, modelName string, metricsArtifactRepo dbmodels.CatalogMetricsArtifactRepository, metricsArtifactTypeID int32) (int, error) {
307+
func processModelArtifactsBatch(dirPath string, modelID int32, modelName string, overallAccuracy *float64, metricsArtifactRepo dbmodels.CatalogMetricsArtifactRepository, metricsArtifactTypeID int32) (int, error) {
307308
// Parse all metrics files
308309
var evaluationRecords []evaluationRecord
309310
var performanceRecords []performanceRecord
@@ -359,7 +360,7 @@ func processModelArtifactsBatch(dirPath string, modelID int32, modelName string,
359360
if len(evaluationRecords) > 0 {
360361
externalID := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
361362
if !existingArtifactsMap[externalID] {
362-
artifact := createAccuracyMetricsArtifact(evaluationRecords, modelID, metricsArtifactTypeID, nil, nil)
363+
artifact := createAccuracyMetricsArtifact(evaluationRecords, modelID, metricsArtifactTypeID, overallAccuracy, nil, nil)
363364
artifactsToInsert = append(artifactsToInsert, artifact)
364365
} else {
365366
glog.V(2).Infof("Accuracy metrics artifact already exists, skipping")
@@ -463,7 +464,7 @@ func parsePerformanceFile(filePath string) ([]performanceRecord, error) {
463464
}
464465

465466
// createAccuracyMetricsArtifact creates a single metrics artifact from all evaluation records
466-
func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32, typeID int32, existingID *int32, existingCreateTime *int64) *dbmodels.CatalogMetricsArtifactImpl {
467+
func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32, typeID int32, overallAccuracy *float64, existingID *int32, existingCreateTime *int64) *dbmodels.CatalogMetricsArtifactImpl {
467468
artifactName := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
468469
externalID := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
469470

@@ -506,6 +507,14 @@ func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32
506507
}
507508
}
508509

510+
// Add overall_average custom property from metadata.json overall_accuracy field
511+
if overallAccuracy != nil {
512+
customProperties = append(customProperties, models.Properties{
513+
Name: "overall_average",
514+
DoubleValue: overallAccuracy,
515+
})
516+
}
517+
509518
// Create the metrics artifact with metricsType set to accuracy-metrics
510519
metricsArtifact := &dbmodels.CatalogMetricsArtifactImpl{
511520
ID: existingID, // Use existing ID if updating

catalog/internal/catalog/performance_metrics_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,98 @@ func TestParseMetadataJSON_OnlyIDMatters(t *testing.T) {
235235
}
236236
}
237237

238+
func TestOverallAccuracyToOverallAverage(t *testing.T) {
239+
t.Run("parse overall_accuracy from metadata", func(t *testing.T) {
240+
tests := []struct {
241+
name string
242+
jsonData string
243+
wantNil bool
244+
wantValue float64
245+
}{
246+
{
247+
name: "overall_accuracy present",
248+
jsonData: `{"id": "model-1", "overall_accuracy": 85.5}`,
249+
wantNil: false,
250+
wantValue: 85.5,
251+
},
252+
{
253+
name: "overall_accuracy is zero",
254+
jsonData: `{"id": "model-2", "overall_accuracy": 0}`,
255+
wantNil: false,
256+
wantValue: 0.0,
257+
},
258+
{
259+
name: "overall_accuracy is null",
260+
jsonData: `{"id": "model-3", "overall_accuracy": null}`,
261+
wantNil: true,
262+
},
263+
{
264+
name: "overall_accuracy missing",
265+
jsonData: `{"id": "model-4"}`,
266+
wantNil: true,
267+
},
268+
}
269+
270+
for _, tt := range tests {
271+
t.Run(tt.name, func(t *testing.T) {
272+
metadata, err := parseMetadataJSON([]byte(tt.jsonData))
273+
if err != nil {
274+
t.Fatalf("parseMetadataJSON() error = %v", err)
275+
}
276+
277+
if tt.wantNil {
278+
if metadata.OverallAccuracy != nil {
279+
t.Errorf("OverallAccuracy = %v, want nil", *metadata.OverallAccuracy)
280+
}
281+
} else {
282+
if metadata.OverallAccuracy == nil {
283+
t.Errorf("OverallAccuracy = nil, want %v", tt.wantValue)
284+
} else if *metadata.OverallAccuracy != tt.wantValue {
285+
t.Errorf("OverallAccuracy = %v, want %v", *metadata.OverallAccuracy, tt.wantValue)
286+
}
287+
}
288+
})
289+
}
290+
})
291+
292+
t.Run("artifact has overall_average when overall_accuracy provided", func(t *testing.T) {
293+
overallAccuracy := 87.5
294+
evalRecords := []evaluationRecord{
295+
{Benchmark: "mmlu", CustomProperties: map[string]interface{}{"score": 90.0}},
296+
}
297+
298+
artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, &overallAccuracy, nil, nil)
299+
300+
found := false
301+
for _, prop := range *artifact.CustomProperties {
302+
if prop.Name == "overall_average" && prop.DoubleValue != nil {
303+
if *prop.DoubleValue != overallAccuracy {
304+
t.Errorf("overall_average = %v, want %v", *prop.DoubleValue, overallAccuracy)
305+
}
306+
found = true
307+
break
308+
}
309+
}
310+
if !found {
311+
t.Error("overall_average custom property not found in artifact")
312+
}
313+
})
314+
315+
t.Run("artifact has no overall_average when overall_accuracy is nil", func(t *testing.T) {
316+
evalRecords := []evaluationRecord{
317+
{Benchmark: "mmlu", CustomProperties: map[string]interface{}{"score": 90.0}},
318+
}
319+
320+
artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)
321+
322+
for _, prop := range *artifact.CustomProperties {
323+
if prop.Name == "overall_average" {
324+
t.Error("overall_average should not exist when overall_accuracy is nil")
325+
}
326+
}
327+
})
328+
}
329+
238330
func TestEvaluationRecordUnmarshalJSON(t *testing.T) {
239331
tests := []struct {
240332
name string

clients/python/noxfile.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,8 @@ def tests(session: Session) -> None:
7272
@session(name="e2e", python=python_versions)
7373
def e2e_tests(session: Session) -> None:
7474
"""Run the test suite."""
75-
session.install(
75+
packages = [
7676
".",
77-
"ray",
7877
"requests",
7978
"pytest",
8079
"pytest-asyncio",
@@ -86,7 +85,12 @@ def e2e_tests(session: Session) -> None:
8685
"olot",
8786
"uvloop",
8887
"schemathesis",
89-
)
88+
]
89+
# Ray requires Python >3.9
90+
if session.python != "3.9":
91+
packages.insert(1, "ray")
92+
93+
session.install(*packages)
9094
try:
9195
session.run(
9296
"pytest",

0 commit comments

Comments
 (0)