Agenta-AI · jp-agenta · Oct 15, 2025 · Oct 15, 2025
diff --git a/.gitleaks.toml b/.gitleaks.toml
@@ -11,6 +11,7 @@ paths = [
   '''^core/docs/docs/reference/api/.*\.mdx''',
   '''^docs/docs/reference/api/.*\.mdx''',
   '''^docs/.docusaurus/.*''',
+  '''^docs/build/.*''',
   # -------------------------------------------------------------- WEB ARTIFACTS
   '''^.*/\.pnpm-store/.*''',
   '''^.*/public/__env\.js$''',
@@ -24,6 +25,8 @@ regexes = [
   '''is_completion=True''',
   '''YOUR_API_KEY''',
   '''_SECRET_KEY''',
+  # ------------------------------------------------------------ PUBLIC KEYS
+  '''phc_hmVSxIjTW1REBHXgj2aw4HW9X6CXb6FzerBgP9XenC7''',
   # ----------------------------------------------------------------------------
 ]
 

diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "api"
-version = "0.58.0"
+version = "0.59.0"
 description = "Agenta API"
 authors = [
     { name = "Mahmoud Mabrouk", email = "[email protected]" },

diff --git a/docs/blog/entries/filtering-traces-by-annotation.mdx b/docs/blog/entries/filtering-traces-by-annotation.mdx
@@ -0,0 +1,40 @@
+---
+title: "Filtering Traces by Annotation"
+slug: filtering-traces-by-annotation
+date: 2025-10-14
+tags: [v0.58.0]
+---
+
+import Image from "@theme/IdealImage";
+
+
+We rebuilt the filtering system in observability. We added a new dropdown with more options. Additionally, we added a new annotation filtering. You can now filter and search traces based on their annotations. This feature helps you find traces with low scores or bad feedback quickly.
+
+### New Filter Options
+
+The new dropdown is simpler and gives you more options. You can now filter by:
+- **Span status**: Find successful or failed spans
+- **Input keys**: Search for specific inputs in your spans
+- **App or environment**: Filter traces from specific apps or environments
+- **Any key within your span**: Search custom data in your trace structure
+
+<Image img={require('/static/images/changelog/changelog-filters-observability.png')} alt="Complete observability filters" style={{display: 'block', margin: '20px auto', textAlign: 'center', width: '20%'}} />
+
+### Annotation Filtering
+
+Filter traces based on evaluations and feedback:
+- **Evaluator results**: Find spans evaluated by a specific evaluator
+- **User feedback**: Search for spans with feedback like `success=True`
+
+<Image img={require('/static/images/changelog/changelog-annotation-filter.png')} alt="Annotation filtering interface" style={{display: 'block', margin: '20px auto', textAlign: 'center'}} />
+
+This feature enables powerful workflows:
+
+1. **Capture user feedback** from your application using our API ([see tutorial](/tutorials/cookbooks/capture-user-feedback))
+2. **Filter traces** to find those with bad feedback or low scores
+3. **Add them to test sets** to track problematic cases
+4. **Improve your prompts** based on real user feedback
+
+The filtering system makes it easy to turn production issues into test cases.
+
+---
diff --git a/docs/blog/main.mdx b/docs/blog/main.mdx
@@ -10,6 +10,27 @@ import Image from "@theme/IdealImage";
 
 <section class="changelog">
 
+### [Filtering Traces by Annotation](/changelog/filtering-traces-by-annotation)
+
+_14 October 2025_
+
+**v0.58.0**
+<Image img={require('/static/images/changelog/changelog-annotation-filter.png')} alt="Annotation filtering interface" style={{display: 'block', margin: '20px auto', textAlign: 'center'}} />
+
+
+You can now filter and search traces based on their annotations. This helps you find traces with low scores or bad feedback quickly.
+
+We rebuilt the filtering system in observability with a simpler dropdown and more options. You can now filter by span status, input keys, app or environment references, and any key within your span.
+
+The new annotation filtering lets you find:
+- Spans evaluated by a specific evaluator
+- Spans with user feedback like `success=True`
+
+
+This enables powerful workflows: [capture user feedback](/tutorials/cookbooks/capture-user-feedback) from your app, filter to find traces with bad feedback, add them to test sets, and improve your prompts based on real user data.
+
+---
+
 ### [New Evaluation Results Dashboard](/changelog/new-evaluation-results-dashboard)
 
 _26 September 2025_

diff --git a/docs/src/data/roadmap.ts b/docs/src/data/roadmap.ts
@@ -24,6 +24,19 @@ export const shippedFeatures: ShippedFeature[] = [
     // Playground: BCFF78
     // Observability: DE74FF
     // Evaluation: 86B7FF
+    {
+        id: "filtering-by-annotation",
+        title: "Filtering Traces by Annotation",
+        description: "Filter and search for traces based on their annotations. Find traces with low scores or feedback quickly using the rebuilt filtering system.",
+        changelogPath: "/changelog/filtering-traces-by-annotation",
+        shippedAt: "2025-10-14",
+        labels: [
+            {
+                name: "Observability",
+                color: "DE74FF",
+            },
+        ],
+    },
     {
         id: "evaluation-results-dashboard",
         title: "New Evaluation Results Dashboard",
@@ -173,19 +186,7 @@ export const inProgressFeatures: PlannedFeature[] = [
             },
         ],
     },
-
     {
-        id: "filtering-by-annotation",
-        title: "Filtering Traces by Annotation",
-        description: "We are adding the ability to filter traces by annotation. This is useful for finding traces with low scores or feedback.",
-        githubUrl: "https://github.com/Agenta-AI/agenta/discussions/2729",
-        labels: [
-            {
-                name: "Observability",
-                color: "DE74FF",
-            },
-        ],
-    }, {
         id: "date-range-filtering",
         title: "Date Range Filtering in Metrics Dashboard",
         description: "We are adding the ability to filter traces by date range in the metrics dashboard.",

diff --git a/docs/static/images/changelog/changelog-annotation-filter.png b/docs/static/images/changelog/changelog-annotation-filter.png
diff --git a/docs/static/images/changelog/changelog-filters-observability.png b/docs/static/images/changelog/changelog-filters-observability.png
diff --git a/hooks/setup.sh b/hooks/setup.sh
@@ -26,8 +26,11 @@ if ! command -v gitleaks >/dev/null 2>&1; then
     brew install gitleaks
   else
     # fallback: go install (requires Go installed)
-    go install github.com/gitleaks/gitleaks/v8@latest
+    go install github.com/zricethezav/gitleaks/v8@latest
     export PATH="$PATH:$(go env GOPATH)/bin"
+    echo "----------------------------------------------------------------------"
+    echo "⚠️⚠️ PLEASE ADD $(go env GOPATH)/bin TO YOUR PATH IN YOUR .bashrc OR .zshrc ⚠️⚠️"
+    echo "----------------------------------------------------------------------"
   fi
 fi
 

diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.58.0"
+version = "0.59.0"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = [

diff --git a/web/ee/package.json b/web/ee/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@agenta/ee",
-    "version": "0.58.0",
+    "version": "0.59.0",
     "private": true,
     "engines": {
         "node": ">=18"

diff --git a/web/ee/src/components/Banners/BillingPlanBanner/FreeTrialBanner.tsx b/web/ee/src/components/Banners/BillingPlanBanner/FreeTrialBanner.tsx
@@ -1,7 +1,7 @@
 import {Button, Typography} from "antd"
 import {useRouter} from "next/router"
-import useURL from "@/oss/hooks/useURL"
 
+import useURL from "@/oss/hooks/useURL"
 import {SubscriptionType} from "@/oss/services/billing/types"
 
 import SubscriptionPlanDetails from "@/agenta-oss-common/components/pages/settings/Billing/Modals/PricingModal/assets/SubscriptionPlanDetails"

diff --git a/.../AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx b/.../AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx
@@ -1,6 +1,7 @@
+import clsx from "clsx"
+
 import SimpleSharedEditor from "@/oss/components/EditorViews/SimpleSharedEditor"
 import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult"
-import clsx from "clsx"
 
 const RunOutput = ({
     runId,

diff --git a/...EvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx b/...EvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx
@@ -1,15 +1,17 @@
+import {memo} from "react"
+
+import clsx from "clsx"
+import {useAtomValue} from "jotai"
+import dynamic from "next/dynamic"
+
+import EvalNameTag from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag"
 import {EVAL_TAG_COLOR} from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/utils"
 import {useRunId} from "@/oss/contexts/RunIdContext"
 import {
     evalAtomStore,
     evaluationRunStateFamily,
 } from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms"
 import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult"
-import clsx from "clsx"
-import {useAtomValue} from "jotai"
-import {memo} from "react"
-import dynamic from "next/dynamic"
-import EvalNameTag from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag"
 
 const GenerationResultUtils = dynamic(
     () =>

diff --git a/.../src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx b/.../src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx
@@ -7,15 +7,15 @@ import {atomFamily} from "jotai/utils"
 
 import {useRunId} from "@/oss/contexts/RunIdContext"
 import {evaluationRunStateFamily} from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms"
+import {projectIdAtom} from "@/oss/state/project/selectors/project"
 import {
     clearProjectVariantReferencesAtom,
     prefetchProjectVariantConfigs,
     setProjectVariantReferencesAtom,
 } from "@/oss/state/projectVariantConfig"
-import {projectIdAtom} from "@/oss/state/project/selectors/project"
 
-import {urlStateAtom} from "../../../state/urlState"
 import {collectProjectVariantReferences} from "../../../../../lib/hooks/usePreviewEvaluations/projectVariantConfigs"
+import {urlStateAtom} from "../../../state/urlState"
 
 import PromptConfigCard from "./assets/PromptConfigCard"
 

diff --git a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx
@@ -61,7 +61,7 @@ const DURATION_METRIC_KEY = INVOCATION_METRIC_KEYS[1]
 const TOKEN_METRIC_KEY = INVOCATION_METRIC_KEYS[2]
 const ERRORS_METRIC_KEY = INVOCATION_METRIC_KEYS[3]
 
-const INVOCATION_METRIC_COLUMNS: Array<{key: string; label: string}> = [
+const INVOCATION_METRIC_COLUMNS: {key: string; label: string}[] = [
     {key: COST_METRIC_KEY, label: "Cost (Total)"},
     {key: DURATION_METRIC_KEY, label: "Duration (Total)"},
     {key: TOKEN_METRIC_KEY, label: "Total tokens"},

diff --git a/web/ee/src/components/EvalRunDetails/UrlSync.tsx b/web/ee/src/components/EvalRunDetails/UrlSync.tsx
@@ -3,10 +3,11 @@ import {useEffect} from "react"
 import {useAtom, useAtomValue, useSetAtom} from "jotai"
 import {useRouter} from "next/router"
 
+import {useRunId} from "@/oss/contexts/RunIdContext"
+
 import {evalAtomStore} from "../../lib/hooks/useEvaluationRunData/assets/atoms/store"
 
 import {EvalRunUrlState, runViewTypeAtom, urlStateAtom} from "./state/urlState"
-import {useRunId} from "@/oss/contexts/RunIdContext"
 
 const UrlSync = ({evalType}: {evalType: "auto" | "human"}) => {
     const router = useRouter()

diff --git a/...components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx b/...components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx
@@ -1,9 +1,9 @@
 import {RefObject, useEffect, useMemo} from "react"
-import dynamic from "next/dynamic"
 
 import {DownOutlined} from "@ant-design/icons"
 import clsx from "clsx"
 import {atom, useAtom, useAtomValue} from "jotai"
+import dynamic from "next/dynamic"
 import {useResizeObserver} from "usehooks-ts"
 
 import {useRunId} from "@/oss/contexts/RunIdContext"

diff --git a/...components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts b/...components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts
@@ -1,7 +1,7 @@
 import groupBy from "lodash/groupBy"
 
-import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper"
 import {evalTypeAtom} from "@/oss/components/EvalRunDetails/state/evalType"
+import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper"
 import {
     evalAtomStore,
     evaluationEvaluatorsFamily,

diff --git a/...unDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx b/...unDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx
@@ -4,22 +4,23 @@ import deepEqual from "fast-deep-equal"
 import {atom, useAtomValue} from "jotai"
 import {atomFamily} from "jotai/utils"
 
+import {filterColumns} from "@/oss/components/Filters/EditColumns/assets/helper"
 import {
     evalAtomStore,
     evaluationRunStateFamily,
     runIndexFamily,
 } from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms"
-import {filterColumns} from "@/oss/components/Filters/EditColumns/assets/helper"
 import type {RunIndex} from "@/oss/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex"
 
 import {
     displayedScenarioIdsFamily,
     scenarioStepsFamily,
 } from "../../../../../lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios"
 import {buildScenarioTableData} from "../assets/dataSourceBuilder"
+import type {TableColumn} from "../assets/types"
 import {buildAntdColumns} from "../assets/utils"
 import {expendedRowAtom} from "../ComparisonScenarioTable"
-import type {TableColumn} from "../assets/types"
+
 import {editColumnsFamily} from "./useTableDataSource"
 
 export interface GroupedScenario {

diff --git a/web/ee/src/components/EvalRunDetails/index.tsx b/web/ee/src/components/EvalRunDetails/index.tsx
@@ -7,8 +7,8 @@ import {createStore, getDefaultStore, Provider, useAtomValue, useSetAtom} from "
 import {selectAtom} from "jotai/utils"
 import {useRouter} from "next/router"
 
-import EvalRunDetails from "@/oss/components/EvalRunDetails/HumanEvalRun"
 import ErrorState from "@/oss/components/ErrorState"
+import EvalRunDetails from "@/oss/components/EvalRunDetails/HumanEvalRun"
 import SingleModelEvaluationTable from "@/oss/components/EvaluationTable/SingleModelEvaluationTable"
 import {RunIdProvider} from "@/oss/contexts/RunIdContext"
 import {useAppId} from "@/oss/hooks/useAppId"

diff --git a/web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx
@@ -0,0 +1,19 @@
+import {memo} from "react"
+
+import {Tag} from "antd"
+
+const EvaluatorTagsCell = memo(({tags}: {tags: string[]}) => {
+    if (!tags?.length) return null
+
+    return (
+        <div className="flex flex-nowrap items-center gap-2">
+            {tags.map((tag) => (
+                <Tag key={tag} bordered={false} className="bg-[#0517290F]">
+                    {tag}
+                </Tag>
+            ))}
+        </div>
+    )
+})
+
+export default EvaluatorTagsCell
diff --git a/web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx
@@ -0,0 +1,53 @@
+import {memo, useMemo, type CSSProperties} from "react"
+
+import {Tag} from "antd"
+
+import {EvaluatorTypeBadge} from "../types"
+
+const hexToRgb = (hex?: string) => {
+    if (!hex) return null
+    let sanitized = hex.replace("#", "")
+    if (sanitized.length === 3) {
+        sanitized = sanitized
+            .split("")
+            .map((char) => char + char)
+            .join("")
+    }
+    if (sanitized.length !== 6) return null
+
+    const intVal = Number.parseInt(sanitized, 16)
+    if (Number.isNaN(intVal)) return null
+
+    return {
+        r: (intVal >> 16) & 255,
+        g: (intVal >> 8) & 255,
+        b: intVal & 255,
+    }
+}
+
+const EvaluatorTypePill = memo(({badge}: {badge: EvaluatorTypeBadge}) => {
+    const baseHex = badge.colorHex
+    const computedStyle = useMemo(() => {
+        const rgb = hexToRgb(baseHex)
+        if (!rgb) return undefined
+
+        return {
+            backgroundColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.12)`,
+            borderColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.32)`,
+            color: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.88)`,
+        } satisfies CSSProperties
+    }, [baseHex])
+
+    return (
+        <Tag
+            bordered
+            style={computedStyle}
+            color={computedStyle ? undefined : baseHex}
+            className="!m-0 capitalize"
+        >
+            {badge.label}
+        </Tag>
+    )
+})
+
+export default EvaluatorTypePill