Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitleaks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ paths = [
'''^core/docs/docs/reference/api/.*\.mdx''',
'''^docs/docs/reference/api/.*\.mdx''',
'''^docs/.docusaurus/.*''',
'''^docs/build/.*''',
# -------------------------------------------------------------- WEB ARTIFACTS
'''^.*/\.pnpm-store/.*''',
'''^.*/public/__env\.js$''',
Expand All @@ -24,6 +25,8 @@ regexes = [
'''is_completion=True''',
'''YOUR_API_KEY''',
'''_SECRET_KEY''',
# ------------------------------------------------------------ PUBLIC KEYS
'''phc_hmVSxIjTW1REBHXgj2aw4HW9X6CXb6FzerBgP9XenC7''',
# ----------------------------------------------------------------------------
]

Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "api"
version = "0.58.0"
version = "0.59.0"
description = "Agenta API"
authors = [
{ name = "Mahmoud Mabrouk", email = "[email protected]" },
Expand Down
40 changes: 40 additions & 0 deletions docs/blog/entries/filtering-traces-by-annotation.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
title: "Filtering Traces by Annotation"
slug: filtering-traces-by-annotation
date: 2025-10-14
tags: [v0.58.0]
---

import Image from "@theme/IdealImage";


We rebuilt the filtering system in observability. We added a new dropdown with more options. Additionally, we added a new annotation filtering. You can now filter and search traces based on their annotations. This feature helps you find traces with low scores or bad feedback quickly.

### New Filter Options

The new dropdown is simpler and gives you more options. You can now filter by:
- **Span status**: Find successful or failed spans
- **Input keys**: Search for specific inputs in your spans
- **App or environment**: Filter traces from specific apps or environments
- **Any key within your span**: Search custom data in your trace structure

<Image img={require('/static/images/changelog/changelog-filters-observability.png')} alt="Complete observability filters" style={{display: 'block', margin: '20px auto', textAlign: 'center', width: '20%'}} />

### Annotation Filtering

Filter traces based on evaluations and feedback:
- **Evaluator results**: Find spans evaluated by a specific evaluator
- **User feedback**: Search for spans with feedback like `success=True`

<Image img={require('/static/images/changelog/changelog-annotation-filter.png')} alt="Annotation filtering interface" style={{display: 'block', margin: '20px auto', textAlign: 'center'}} />

This feature enables powerful workflows:

1. **Capture user feedback** from your application using our API ([see tutorial](/tutorials/cookbooks/capture-user-feedback))
2. **Filter traces** to find those with bad feedback or low scores
3. **Add them to test sets** to track problematic cases
4. **Improve your prompts** based on real user feedback

The filtering system makes it easy to turn production issues into test cases.

---
21 changes: 21 additions & 0 deletions docs/blog/main.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,27 @@ import Image from "@theme/IdealImage";

<section class="changelog">

### [Filtering Traces by Annotation](/changelog/filtering-traces-by-annotation)

_14 October 2025_

**v0.58.0**
<Image img={require('/static/images/changelog/changelog-annotation-filter.png')} alt="Annotation filtering interface" style={{display: 'block', margin: '20px auto', textAlign: 'center'}} />


You can now filter and search traces based on their annotations. This helps you find traces with low scores or bad feedback quickly.

We rebuilt the filtering system in observability with a simpler dropdown and more options. You can now filter by span status, input keys, app or environment references, and any key within your span.

The new annotation filtering lets you find:
- Spans evaluated by a specific evaluator
- Spans with user feedback like `success=True`


This enables powerful workflows: [capture user feedback](/tutorials/cookbooks/capture-user-feedback) from your app, filter to find traces with bad feedback, add them to test sets, and improve your prompts based on real user data.

---

### [New Evaluation Results Dashboard](/changelog/new-evaluation-results-dashboard)

_26 September 2025_
Expand Down
25 changes: 13 additions & 12 deletions docs/src/data/roadmap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ export const shippedFeatures: ShippedFeature[] = [
// Playground: BCFF78
// Observability: DE74FF
// Evaluation: 86B7FF
{
id: "filtering-by-annotation",
title: "Filtering Traces by Annotation",
description: "Filter and search for traces based on their annotations. Find traces with low scores or feedback quickly using the rebuilt filtering system.",
changelogPath: "/changelog/filtering-traces-by-annotation",
shippedAt: "2025-10-14",
labels: [
{
name: "Observability",
color: "DE74FF",
},
],
},
{
id: "evaluation-results-dashboard",
title: "New Evaluation Results Dashboard",
Expand Down Expand Up @@ -173,19 +186,7 @@ export const inProgressFeatures: PlannedFeature[] = [
},
],
},

{
id: "filtering-by-annotation",
title: "Filtering Traces by Annotation",
description: "We are adding the ability to filter traces by annotation. This is useful for finding traces with low scores or feedback.",
githubUrl: "https://github.com/Agenta-AI/agenta/discussions/2729",
labels: [
{
name: "Observability",
color: "DE74FF",
},
],
}, {
id: "date-range-filtering",
title: "Date Range Filtering in Metrics Dashboard",
description: "We are adding the ability to filter traces by date range in the metrics dashboard.",
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 4 additions & 1 deletion hooks/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ if ! command -v gitleaks >/dev/null 2>&1; then
brew install gitleaks
else
# fallback: go install (requires Go installed)
go install github.com/gitleaks/gitleaks/v8@latest
go install github.com/zricethezav/gitleaks/v8@latest
export PATH="$PATH:$(go env GOPATH)/bin"
echo "----------------------------------------------------------------------"
echo "⚠️⚠️ PLEASE ADD $(go env GOPATH)/bin TO YOUR PATH IN YOUR .bashrc OR .zshrc ⚠️⚠️"
echo "----------------------------------------------------------------------"
fi
fi

Expand Down
2 changes: 1 addition & 1 deletion sdk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "agenta"
version = "0.58.0"
version = "0.59.0"
description = "The SDK for agenta is an open-source LLMOps platform."
readme = "README.md"
authors = [
Expand Down
2 changes: 1 addition & 1 deletion web/ee/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@agenta/ee",
"version": "0.58.0",
"version": "0.59.0",
"private": true,
"engines": {
"node": ">=18"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {Button, Typography} from "antd"
import {useRouter} from "next/router"
import useURL from "@/oss/hooks/useURL"

import useURL from "@/oss/hooks/useURL"
import {SubscriptionType} from "@/oss/services/billing/types"

import SubscriptionPlanDetails from "@/agenta-oss-common/components/pages/settings/Billing/Modals/PricingModal/assets/SubscriptionPlanDetails"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import clsx from "clsx"

import SimpleSharedEditor from "@/oss/components/EditorViews/SimpleSharedEditor"
import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult"
import clsx from "clsx"

const RunOutput = ({
runId,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import {memo} from "react"

import clsx from "clsx"
import {useAtomValue} from "jotai"
import dynamic from "next/dynamic"

import EvalNameTag from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag"
import {EVAL_TAG_COLOR} from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/utils"
import {useRunId} from "@/oss/contexts/RunIdContext"
import {
evalAtomStore,
evaluationRunStateFamily,
} from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms"
import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult"
import clsx from "clsx"
import {useAtomValue} from "jotai"
import {memo} from "react"
import dynamic from "next/dynamic"
import EvalNameTag from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag"

const GenerationResultUtils = dynamic(
() =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ import {atomFamily} from "jotai/utils"

import {useRunId} from "@/oss/contexts/RunIdContext"
import {evaluationRunStateFamily} from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms"
import {projectIdAtom} from "@/oss/state/project/selectors/project"
import {
clearProjectVariantReferencesAtom,
prefetchProjectVariantConfigs,
setProjectVariantReferencesAtom,
} from "@/oss/state/projectVariantConfig"
import {projectIdAtom} from "@/oss/state/project/selectors/project"

import {urlStateAtom} from "../../../state/urlState"
import {collectProjectVariantReferences} from "../../../../../lib/hooks/usePreviewEvaluations/projectVariantConfigs"
import {urlStateAtom} from "../../../state/urlState"

import PromptConfigCard from "./assets/PromptConfigCard"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ const DURATION_METRIC_KEY = INVOCATION_METRIC_KEYS[1]
const TOKEN_METRIC_KEY = INVOCATION_METRIC_KEYS[2]
const ERRORS_METRIC_KEY = INVOCATION_METRIC_KEYS[3]

const INVOCATION_METRIC_COLUMNS: Array<{key: string; label: string}> = [
const INVOCATION_METRIC_COLUMNS: {key: string; label: string}[] = [
{key: COST_METRIC_KEY, label: "Cost (Total)"},
{key: DURATION_METRIC_KEY, label: "Duration (Total)"},
{key: TOKEN_METRIC_KEY, label: "Total tokens"},
Expand Down
3 changes: 2 additions & 1 deletion web/ee/src/components/EvalRunDetails/UrlSync.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ import {useEffect} from "react"
import {useAtom, useAtomValue, useSetAtom} from "jotai"
import {useRouter} from "next/router"

import {useRunId} from "@/oss/contexts/RunIdContext"

import {evalAtomStore} from "../../lib/hooks/useEvaluationRunData/assets/atoms/store"

import {EvalRunUrlState, runViewTypeAtom, urlStateAtom} from "./state/urlState"
import {useRunId} from "@/oss/contexts/RunIdContext"

const UrlSync = ({evalType}: {evalType: "auto" | "human"}) => {
const router = useRouter()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import {RefObject, useEffect, useMemo} from "react"
import dynamic from "next/dynamic"

import {DownOutlined} from "@ant-design/icons"
import clsx from "clsx"
import {atom, useAtom, useAtomValue} from "jotai"
import dynamic from "next/dynamic"
import {useResizeObserver} from "usehooks-ts"

import {useRunId} from "@/oss/contexts/RunIdContext"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import groupBy from "lodash/groupBy"

import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper"
import {evalTypeAtom} from "@/oss/components/EvalRunDetails/state/evalType"
import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper"
import {
evalAtomStore,
evaluationEvaluatorsFamily,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@ import deepEqual from "fast-deep-equal"
import {atom, useAtomValue} from "jotai"
import {atomFamily} from "jotai/utils"

import {filterColumns} from "@/oss/components/Filters/EditColumns/assets/helper"
import {
evalAtomStore,
evaluationRunStateFamily,
runIndexFamily,
} from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms"
import {filterColumns} from "@/oss/components/Filters/EditColumns/assets/helper"
import type {RunIndex} from "@/oss/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex"

import {
displayedScenarioIdsFamily,
scenarioStepsFamily,
} from "../../../../../lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios"
import {buildScenarioTableData} from "../assets/dataSourceBuilder"
import type {TableColumn} from "../assets/types"
import {buildAntdColumns} from "../assets/utils"
import {expendedRowAtom} from "../ComparisonScenarioTable"
import type {TableColumn} from "../assets/types"

import {editColumnsFamily} from "./useTableDataSource"

export interface GroupedScenario {
Expand Down
2 changes: 1 addition & 1 deletion web/ee/src/components/EvalRunDetails/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import {createStore, getDefaultStore, Provider, useAtomValue, useSetAtom} from "
import {selectAtom} from "jotai/utils"
import {useRouter} from "next/router"

import EvalRunDetails from "@/oss/components/EvalRunDetails/HumanEvalRun"
import ErrorState from "@/oss/components/ErrorState"
import EvalRunDetails from "@/oss/components/EvalRunDetails/HumanEvalRun"
import SingleModelEvaluationTable from "@/oss/components/EvaluationTable/SingleModelEvaluationTable"
import {RunIdProvider} from "@/oss/contexts/RunIdContext"
import {useAppId} from "@/oss/hooks/useAppId"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import {memo} from "react"

import {Tag} from "antd"

const EvaluatorTagsCell = memo(({tags}: {tags: string[]}) => {
if (!tags?.length) return null

return (
<div className="flex flex-nowrap items-center gap-2">
{tags.map((tag) => (
<Tag key={tag} bordered={false} className="bg-[#0517290F]">
{tag}
</Tag>
))}
</div>
)
})

export default EvaluatorTagsCell
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import {memo, useMemo, type CSSProperties} from "react"

import {Tag} from "antd"

import {EvaluatorTypeBadge} from "../types"

const hexToRgb = (hex?: string) => {
if (!hex) return null
let sanitized = hex.replace("#", "")
if (sanitized.length === 3) {
sanitized = sanitized
.split("")
.map((char) => char + char)
.join("")
}
if (sanitized.length !== 6) return null

const intVal = Number.parseInt(sanitized, 16)
if (Number.isNaN(intVal)) return null

return {
r: (intVal >> 16) & 255,
g: (intVal >> 8) & 255,
b: intVal & 255,
}
}

const EvaluatorTypePill = memo(({badge}: {badge: EvaluatorTypeBadge}) => {
const baseHex = badge.colorHex
const computedStyle = useMemo(() => {
const rgb = hexToRgb(baseHex)
if (!rgb) return undefined

return {
backgroundColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.12)`,
borderColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.32)`,
color: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.88)`,
} satisfies CSSProperties
}, [baseHex])

return (
<Tag
bordered
style={computedStyle}
color={computedStyle ? undefined : baseHex}
className="!m-0 capitalize"
>
{badge.label}
</Tag>
)
})

export default EvaluatorTypePill
Loading