opendatahub-io
diff --git a/‎.github/workflows/constraints.txt‎
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/constraints.txt‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎.github/workflows/python-tests.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/python-tests.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎catalog/internal/catalog/db_catalog_filterquery_test.go‎
Lines changed: 31 additions & 15 deletions b/‎catalog/internal/catalog/db_catalog_filterquery_test.go‎
Lines changed: 31 additions & 15 deletions
diff --git a/‎catalog/internal/catalog/performance_metrics.go‎
Lines changed: 14 additions & 5 deletions b/‎catalog/internal/catalog/performance_metrics.go‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎catalog/internal/catalog/performance_metrics_test.go‎
Lines changed: 92 additions & 0 deletions b/‎catalog/internal/catalog/performance_metrics_test.go‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎clients/python/noxfile.py‎
Lines changed: 7 additions & 3 deletions b/‎clients/python/noxfile.py‎
Lines changed: 7 additions & 3 deletions
@@ -1,6 +1,6 @@
-pip==23.3.2
-nox==2023.4.22
-nox-poetry==1.0.3
-poetry==1.8.3
-poetry-plugin-export==1.8.0
-virtualenv==20.24.6
+pip==25.3
+nox==2025.11.12
+nox-poetry==1.2.0
+poetry==2.0.1
+poetry-plugin-export==1.9.0
+virtualenv==20.35.4
@@ -53,6 +53,7 @@ jobs:
         run: |
           pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt poetry
           poetry --version
+          pipx inject poetry poetry-plugin-export
       - name: Install Nox
         run: |
           pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt nox
@@ -169,6 +170,7 @@ jobs:
         run: |
           pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt poetry
           poetry --version
+          pipx inject poetry poetry-plugin-export
       - name: Install Nox
         run: |
           pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt nox
@@ -288,6 +290,7 @@ jobs:
         run: |
           pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt poetry
           poetry --version
+          pipx inject poetry poetry-plugin-export
       - name: Install Nox
         run: |
           pipx install --pip-args=--constraint=${{ github.workspace }}/.github/workflows/constraints.txt nox
 
@@ -721,8 +721,9 @@ func TestArtifactFilteringCapability(t *testing.T) {
 				".name = $",
 				".double_value <= $",
 			},
-			expectedArgs: []any{"ttft_mean", float64(90), "tpot_mean", float64(50)},
-			description:  "Should handle multiple artifact property filters with separate JOINs",
+			// Args order: JOIN property names first, then WHERE value conditions
+			expectedArgs: []any{"ttft_mean", "tpot_mean", float64(90), float64(50)},
+			description:  "Should handle multiple artifact property filters in a single EXISTS with multiple property JOINs on the SAME artifact",
 		},
 		{
 			name:        "Artifact property with LIKE",
@@ -835,20 +836,19 @@ func TestArtifactFilteringCapability(t *testing.T) {
 					expectedFragment, generatedSQL)
 			}
 
-			// Verify arguments if specified
+			// Verify arguments if specified - ORDER MATTERS for SQL placeholder mapping
 			if len(tt.expectedArgs) > 0 {
-				// Check that all expected args are present (order may vary due to JOINs)
-				// For numeric values, check value equality regardless of type (int vs float64)
-				for _, expectedArg := range tt.expectedArgs {
-					found := false
-					for _, actualArg := range queryArgs {
-						if actualArg == expectedArg {
-							found = true
-							break
-						}
-					}
-					assert.True(t, found, "Expected argument %v not found in actual args: %v",
-						expectedArg, queryArgs)
+				// Args must be in exact order to match SQL placeholders ($1, $2, etc.)
+				// This is critical for combined artifact filters where JOIN args come before WHERE args
+				require.Equal(t, len(tt.expectedArgs), len(queryArgs)-1, // -1 for type_id
+					"Argument count mismatch (excluding type_id)")
+
+				// Compare args starting from index 1 (skip type_id at index 0)
+				for i, expectedArg := range tt.expectedArgs {
+					actualArg := queryArgs[i+1] // +1 to skip type_id
+					assert.Equal(t, expectedArg, actualArg,
+						"Argument at position %d should be %v but was %v. Full args: %v",
+						i+1, expectedArg, actualArg, queryArgs)
 				}
 			}
 
@@ -1120,6 +1120,22 @@ func TestArtifactFilteringEdgeCases(t *testing.T) {
 			},
 			description: "Should handle both exact and case-insensitive matching on artifact properties (ILIKE uses UPPER for cross-DB compatibility)",
 		},
+		{
+			name:        "Bug fix: multiple artifact filters must match SAME artifact",
+			filterQuery: `artifacts.hardware_type LIKE "H200" AND artifacts.ttft_p95 < 50`,
+			expectedSQL: []string{
+				"EXISTS",
+				`"Attribution"`,
+				`"Artifact"`,
+				// Both property JOINs should reference the same artifact (art_X)
+				"artprop_",
+				".artifact_id = art_",
+				// Both conditions should be in the WHERE clause
+				".string_value LIKE $",
+				".double_value < $",
+			},
+			description: "Multiple artifact property filters with AND should generate a SINGLE EXISTS with multiple property JOINs ensuring BOTH conditions match the SAME artifact (not different artifacts)",
+		},
 		{
 			name:        "Integer literal queries both int_value and double_value",
 			filterQuery: `artifacts.count = 100`,
 
@@ -21,7 +21,8 @@ import (
 // metadataJSON represents the minimal structure needed from metadata.json files
 // Only the ID field is needed to look up existing models
 type metadataJSON struct {
-	ID string `json:"id"` // Maps to model name for lookup
+	ID              string   `json:"id"`               // Maps to model name for lookup
+	OverallAccuracy *float64 `json:"overall_accuracy"` // Overall accuracy score for the model
 }
 
 // parseMetadataJSON parses JSON data into metadataJSON struct, extracting only the ID field
@@ -298,12 +299,12 @@ func processModelDirectory(dirPath string, modelRepo dbmodels.CatalogModelReposi
 	glog.V(2).Infof("Found existing model %s with ID %d, processing metrics", metadata.ID, modelID)
 
 	// Use batch processing for all artifacts
-	return processModelArtifactsBatch(dirPath, modelID, metadata.ID, metricsArtifactRepo, metricsArtifactTypeID)
+	return processModelArtifactsBatch(dirPath, modelID, metadata.ID, metadata.OverallAccuracy, metricsArtifactRepo, metricsArtifactTypeID)
 }
 
 // processModelArtifactsBatch processes all metric artifacts for a model in batch
 // This reduces DB overhead by parsing, checking, and inserting in optimized phases
-func processModelArtifactsBatch(dirPath string, modelID int32, modelName string, metricsArtifactRepo dbmodels.CatalogMetricsArtifactRepository, metricsArtifactTypeID int32) (int, error) {
+func processModelArtifactsBatch(dirPath string, modelID int32, modelName string, overallAccuracy *float64, metricsArtifactRepo dbmodels.CatalogMetricsArtifactRepository, metricsArtifactTypeID int32) (int, error) {
 	// Parse all metrics files
 	var evaluationRecords []evaluationRecord
 	var performanceRecords []performanceRecord
@@ -359,7 +360,7 @@ func processModelArtifactsBatch(dirPath string, modelID int32, modelName string,
 	if len(evaluationRecords) > 0 {
 		externalID := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
 		if !existingArtifactsMap[externalID] {
-			artifact := createAccuracyMetricsArtifact(evaluationRecords, modelID, metricsArtifactTypeID, nil, nil)
+			artifact := createAccuracyMetricsArtifact(evaluationRecords, modelID, metricsArtifactTypeID, overallAccuracy, nil, nil)
 			artifactsToInsert = append(artifactsToInsert, artifact)
 		} else {
 			glog.V(2).Infof("Accuracy metrics artifact already exists, skipping")
@@ -463,7 +464,7 @@ func parsePerformanceFile(filePath string) ([]performanceRecord, error) {
 }
 
 // createAccuracyMetricsArtifact creates a single metrics artifact from all evaluation records
-func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32, typeID int32, existingID *int32, existingCreateTime *int64) *dbmodels.CatalogMetricsArtifactImpl {
+func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32, typeID int32, overallAccuracy *float64, existingID *int32, existingCreateTime *int64) *dbmodels.CatalogMetricsArtifactImpl {
 	artifactName := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
 	externalID := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
 
@@ -506,6 +507,14 @@ func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32
 		}
 	}
 
+	// Add overall_average custom property from metadata.json overall_accuracy field
+	if overallAccuracy != nil {
+		customProperties = append(customProperties, models.Properties{
+			Name:        "overall_average",
+			DoubleValue: overallAccuracy,
+		})
+	}
+
 	// Create the metrics artifact with metricsType set to accuracy-metrics
 	metricsArtifact := &dbmodels.CatalogMetricsArtifactImpl{
 		ID:     existingID, // Use existing ID if updating
 
@@ -235,6 +235,98 @@ func TestParseMetadataJSON_OnlyIDMatters(t *testing.T) {
 	}
 }
 
+func TestOverallAccuracyToOverallAverage(t *testing.T) {
+	t.Run("parse overall_accuracy from metadata", func(t *testing.T) {
+		tests := []struct {
+			name      string
+			jsonData  string
+			wantNil   bool
+			wantValue float64
+		}{
+			{
+				name:      "overall_accuracy present",
+				jsonData:  `{"id": "model-1", "overall_accuracy": 85.5}`,
+				wantNil:   false,
+				wantValue: 85.5,
+			},
+			{
+				name:      "overall_accuracy is zero",
+				jsonData:  `{"id": "model-2", "overall_accuracy": 0}`,
+				wantNil:   false,
+				wantValue: 0.0,
+			},
+			{
+				name:     "overall_accuracy is null",
+				jsonData: `{"id": "model-3", "overall_accuracy": null}`,
+				wantNil:  true,
+			},
+			{
+				name:     "overall_accuracy missing",
+				jsonData: `{"id": "model-4"}`,
+				wantNil:  true,
+			},
+		}
+
+		for _, tt := range tests {
+			t.Run(tt.name, func(t *testing.T) {
+				metadata, err := parseMetadataJSON([]byte(tt.jsonData))
+				if err != nil {
+					t.Fatalf("parseMetadataJSON() error = %v", err)
+				}
+
+				if tt.wantNil {
+					if metadata.OverallAccuracy != nil {
+						t.Errorf("OverallAccuracy = %v, want nil", *metadata.OverallAccuracy)
+					}
+				} else {
+					if metadata.OverallAccuracy == nil {
+						t.Errorf("OverallAccuracy = nil, want %v", tt.wantValue)
+					} else if *metadata.OverallAccuracy != tt.wantValue {
+						t.Errorf("OverallAccuracy = %v, want %v", *metadata.OverallAccuracy, tt.wantValue)
+					}
+				}
+			})
+		}
+	})
+
+	t.Run("artifact has overall_average when overall_accuracy provided", func(t *testing.T) {
+		overallAccuracy := 87.5
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]interface{}{"score": 90.0}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, &overallAccuracy, nil, nil)
+
+		found := false
+		for _, prop := range *artifact.CustomProperties {
+			if prop.Name == "overall_average" && prop.DoubleValue != nil {
+				if *prop.DoubleValue != overallAccuracy {
+					t.Errorf("overall_average = %v, want %v", *prop.DoubleValue, overallAccuracy)
+				}
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Error("overall_average custom property not found in artifact")
+		}
+	})
+
+	t.Run("artifact has no overall_average when overall_accuracy is nil", func(t *testing.T) {
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]interface{}{"score": 90.0}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)
+
+		for _, prop := range *artifact.CustomProperties {
+			if prop.Name == "overall_average" {
+				t.Error("overall_average should not exist when overall_accuracy is nil")
+			}
+		}
+	})
+}
+
 func TestEvaluationRecordUnmarshalJSON(t *testing.T) {
 	tests := []struct {
 		name             string
 
@@ -72,9 +72,8 @@ def tests(session: Session) -> None:
 @session(name="e2e", python=python_versions)
 def e2e_tests(session: Session) -> None:
     """Run the test suite."""
-    session.install(
+    packages = [
         ".",
-        "ray",
         "requests",
         "pytest",
         "pytest-asyncio",
@@ -86,7 +85,12 @@ def e2e_tests(session: Session) -> None:
         "olot",
         "uvloop",
         "schemathesis",
-    )
+    ]
+    # Ray requires Python >3.9
+    if session.python != "3.9":
+        packages.insert(1, "ray")
+
+    session.install(*packages)
     try:
         session.run(
             "pytest",