Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/aiperf/server_metrics/accumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,8 @@ def _compute_endpoint_summaries(
continue

if base_name not in metrics:
# Create appropriate type-specific metric data
match metric_entry.metric_type:
case PrometheusMetricType.GAUGE:
case PrometheusMetricType.GAUGE | PrometheusMetricType.UNKNOWN:
metrics[base_name] = GaugeMetricData(
description=metric_entry.description,
series=[series_stats],
Expand Down
1 change: 1 addition & 0 deletions src/aiperf/server_metrics/csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

STAT_KEYS_MAP = {
PrometheusMetricType.GAUGE: GAUGE_STAT_KEYS,
PrometheusMetricType.UNKNOWN: GAUGE_STAT_KEYS,
PrometheusMetricType.COUNTER: COUNTER_STAT_KEYS,
PrometheusMetricType.HISTOGRAM: HISTOGRAM_STAT_KEYS,
}
Expand Down
2 changes: 1 addition & 1 deletion src/aiperf/server_metrics/export_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def compute_stats(
42.47
"""
match metric_type:
case PrometheusMetricType.GAUGE:
case PrometheusMetricType.GAUGE | PrometheusMetricType.UNKNOWN:
return _compute_gauge_stats(
time_series,
time_filter,
Expand Down
2 changes: 1 addition & 1 deletion src/aiperf/server_metrics/json_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _build_hybrid_metrics(
# Get or create metric entry with appropriate type
if metric_name not in metrics:
match metric_data.type:
case PrometheusMetricType.GAUGE:
case PrometheusMetricType.GAUGE | PrometheusMetricType.UNKNOWN:
metric_class = GaugeMetricData
case PrometheusMetricType.COUNTER:
metric_class = CounterMetricData
Expand Down
12 changes: 10 additions & 2 deletions src/aiperf/server_metrics/parquet_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,10 @@ def _build_parquet_metadata(self, label_keys: set[str]) -> dict[bytes, bytes]:
for time_series_collection in hierarchy.endpoints.values():
for metric_entry in time_series_collection.metrics.values():
total_metrics += 1
if metric_entry.metric_type == PrometheusMetricType.GAUGE:
if metric_entry.metric_type in (
PrometheusMetricType.GAUGE,
PrometheusMetricType.UNKNOWN,
):
type_counts["gauge"] += 1
elif metric_entry.metric_type == PrometheusMetricType.COUNTER:
type_counts["counter"] += 1
Expand Down Expand Up @@ -446,6 +449,7 @@ def _collect_all_rows_generator(self, label_keys: set[str]):
if metric_type in (
PrometheusMetricType.GAUGE,
PrometheusMetricType.COUNTER,
PrometheusMetricType.UNKNOWN,
):
rows = self._collect_scalar_rows(
endpoint_url,
Expand Down Expand Up @@ -495,6 +499,7 @@ def _collect_all_rows(
if metric_type in (
PrometheusMetricType.GAUGE,
PrometheusMetricType.COUNTER,
PrometheusMetricType.UNKNOWN,
):
rows.extend(
self._collect_scalar_rows(
Expand Down Expand Up @@ -547,7 +552,10 @@ def _collect_scalar_rows(
return []

metric_type = metric_entry.metric_type
is_gauge = metric_type == PrometheusMetricType.GAUGE
is_gauge = metric_type in (
PrometheusMetricType.GAUGE,
PrometheusMetricType.UNKNOWN,
)

# Infer unit for this metric
unit = infer_unit(metric_name, metric_entry.description)
Expand Down
14 changes: 10 additions & 4 deletions src/aiperf/server_metrics/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,14 @@ def from_metric_family(cls, metric_family: MetricFamily) -> Self:
return cls(
metric_type=metric_family.type,
description=metric_family.description,
data=ScalarTimeSeries()
if metric_family.type
in (PrometheusMetricType.GAUGE, PrometheusMetricType.COUNTER)
else HistogramTimeSeries(),
data=(
ScalarTimeSeries()
if metric_family.type
in (
PrometheusMetricType.GAUGE,
PrometheusMetricType.COUNTER,
PrometheusMetricType.UNKNOWN,
)
else HistogramTimeSeries()
),
)
53 changes: 20 additions & 33 deletions tests/server_metrics/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,45 +463,32 @@ def test_hashable(self) -> None:
class TestServerMetricEntry:
"""Tests for ServerMetricEntry class."""

def test_from_gauge_metric_family(self) -> None:
"""Test creating entry from gauge metric family."""
@pytest.mark.parametrize(
"metric_type,expected_data_type",
[
(PrometheusMetricType.GAUGE, ScalarTimeSeries),
(PrometheusMetricType.COUNTER, ScalarTimeSeries),
(PrometheusMetricType.HISTOGRAM, HistogramTimeSeries),
(PrometheusMetricType.UNKNOWN, ScalarTimeSeries),
],
)
def test_from_metric_family(
self,
metric_type: PrometheusMetricType,
expected_data_type: type,
) -> None:
"""Test creating entry from metric family produces correct data type."""
family = MetricFamily(
type=PrometheusMetricType.GAUGE,
description="Test gauge",
type=metric_type,
description=f"Test {metric_type.value}",
samples=[],
)

entry = ServerMetricEntry.from_metric_family(family)

assert entry.metric_type == PrometheusMetricType.GAUGE
assert entry.description == "Test gauge"
assert isinstance(entry.data, ScalarTimeSeries)

def test_from_counter_metric_family(self) -> None:
"""Test creating entry from counter metric family."""
family = MetricFamily(
type=PrometheusMetricType.COUNTER,
description="Test counter",
samples=[],
)

entry = ServerMetricEntry.from_metric_family(family)

assert entry.metric_type == PrometheusMetricType.COUNTER
assert isinstance(entry.data, ScalarTimeSeries)

def test_from_histogram_metric_family(self) -> None:
"""Test creating entry from histogram metric family."""
family = MetricFamily(
type=PrometheusMetricType.HISTOGRAM,
description="Test histogram",
samples=[],
)

entry = ServerMetricEntry.from_metric_family(family)

assert entry.metric_type == PrometheusMetricType.HISTOGRAM
assert isinstance(entry.data, HistogramTimeSeries)
assert entry.metric_type == metric_type
assert entry.description == f"Test {metric_type.value}"
assert isinstance(entry.data, expected_data_type)


class TestServerMetricsTimeSeries:
Expand Down
18 changes: 13 additions & 5 deletions tests/unit/server_metrics/test_accumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,33 @@ async def test_export_results_no_data(self, mock_user_config: UserConfig) -> Non

assert result is None

@pytest.mark.parametrize(
"metric_type,metric_name",
[
(PrometheusMetricType.GAUGE, "cache_usage"),
(PrometheusMetricType.UNKNOWN, "unknown_metric"),
],
)
async def test_export_results_with_data(
self,
mock_user_config: UserConfig,
metric_type: PrometheusMetricType,
metric_name: str,
) -> None:
"""Test export_results returns ServerMetricsResults with collected data."""
processor = ServerMetricsAccumulator(mock_user_config)

# Add multiple records
for i in range(5):
gauge = MetricFamily(
type=PrometheusMetricType.GAUGE,
description="KV cache usage",
family = MetricFamily(
type=metric_type,
description="Test metric",
samples=[MetricSample(labels=None, value=0.4 + i * 0.05)],
)
record = ServerMetricsRecord(
endpoint_url="http://node1:8081/metrics",
timestamp_ns=1_000_000_000 + i * 100_000_000,
endpoint_latency_ns=5_000_000,
metrics={"cache_usage": gauge},
metrics={metric_name: family},
)
await processor.process_server_metrics_record(record)

Expand Down
19 changes: 17 additions & 2 deletions tests/unit/server_metrics/test_csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pytest

from aiperf.common.config import EndpointConfig, UserConfig
from aiperf.common.enums import GenericMetricUnit
from aiperf.common.enums import GenericMetricUnit, PrometheusMetricType
from aiperf.common.exceptions import DataExporterDisabled
from aiperf.common.models import (
CounterMetricData,
Expand All @@ -28,7 +28,11 @@
ServerMetricsResults,
)
from aiperf.plugin.enums import EndpointType
from aiperf.server_metrics.csv_exporter import CsvMetricInfo, ServerMetricsCsvExporter
from aiperf.server_metrics.csv_exporter import (
STAT_KEYS_MAP,
CsvMetricInfo,
ServerMetricsCsvExporter,
)
from tests.unit.conftest import create_exporter_config

MetricDataType: TypeAlias = GaugeMetricData | CounterMetricData | HistogramMetricData
Expand Down Expand Up @@ -326,6 +330,17 @@ def server_metrics_results_with_labeled_metrics():
return _create_server_metrics_results({"localhost:8081": endpoint_summary})


class TestStatKeysMap:
"""Test STAT_KEYS_MAP covers all expected metric types."""

def test_unknown_metric_type_uses_gauge_stat_keys(self):
"""Test UNKNOWN type maps to the same stat keys as GAUGE."""
assert (
STAT_KEYS_MAP[PrometheusMetricType.UNKNOWN]
is STAT_KEYS_MAP[PrometheusMetricType.GAUGE]
)
Comment on lines +333 to +341
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

rg -n "section_order|UNKNOWN|PrometheusMetricType" --type py src/aiperf/server_metrics/csv_exporter.py

Repository: ai-dynamo/aiperf

Length of output: 1239


🏁 Script executed:

sed -n '170,200p' src/aiperf/server_metrics/csv_exporter.py

Repository: ai-dynamo/aiperf

Length of output: 1244


🏁 Script executed:

sed -n '213,250p' src/aiperf/server_metrics/csv_exporter.py

Repository: ai-dynamo/aiperf

Length of output: 1515


🏁 Script executed:

rg -n "UNKNOWN.*csv|test.*UNKNOWN.*metric|integration.*csv" --type py tests/

Repository: ai-dynamo/aiperf

Length of output: 42


🏁 Script executed:

rg -n "UNKNOWN|test_.*metric" tests/unit/server_metrics/test_csv_exporter.py | head -30

Repository: ai-dynamo/aiperf

Length of output: 937


Add UNKNOWN metrics to CSV output by including it in section_order or normalizing to GAUGE.

The STAT_KEYS_MAP correctly maps PrometheusMetricType.UNKNOWN to GAUGE_STAT_KEYS (line 44), but _generate_content does not process UNKNOWN metrics. The section_order list (lines 186-189) includes only GAUGE, COUNTER, and HISTOGRAM. Since _group_metrics_by_type stores metrics keyed by their raw type without normalizing UNKNOWN to GAUGE (line 237), any metric with type=UNKNOWN will be silently omitted from CSV output. Either add PrometheusMetricType.UNKNOWN to section_order or normalize UNKNOWN types to GAUGE during the grouping step. Additionally, add an integration test that verifies UNKNOWN-typed metrics produce rows in the CSV output.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/unit/server_metrics/test_csv_exporter.py` around lines 333 - 341, The
CSV exporter currently omits UNKNOWN metrics because section_order only lists
GAUGE/COUNTER/HISTOGRAM and _group_metrics_by_type keys metrics by their raw
type; fix by either adding PrometheusMetricType.UNKNOWN to the section_order
used by _generate_content or by normalizing UNKNOWN to
PrometheusMetricType.GAUGE inside _group_metrics_by_type (or both for clarity),
and add an integration test that creates a metric with
type=PrometheusMetricType.UNKNOWN and asserts a CSV row is emitted; reference
STAT_KEYS_MAP, PrometheusMetricType.UNKNOWN, section_order, _generate_content,
and _group_metrics_by_type when updating the code and tests.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

section_order will not have UNKNOWN in it since _group_metrics_by_type in csv_exporter.py line 211 iterates endpoint_summary.metrics.items and it type is defined as
metrics: SerializeAsAny[
dict[
str,
GaugeMetricData | CounterMetricData | HistogramMetricData,
]
that's ok because in accumulator.py in line 247
case PrometheusMetricType.GAUGE | PrometheusMetricType.UNKNOWN:
metrics[base_name] = GaugeMetricData(
description=metric_entry.description,
series=[series_stats],
)



class TestServerMetricsCsvExporterInitialization:
"""Test exporter initialization."""

Expand Down
27 changes: 27 additions & 0 deletions tests/unit/server_metrics/test_export_stats_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import pytest

from aiperf.common.constants import NANOS_PER_SECOND
from aiperf.common.enums import PrometheusMetricType
from aiperf.common.models.server_metrics_models import TimeRangeFilter
from aiperf.server_metrics.export_stats import (
_compute_counter_stats,
_compute_gauge_stats,
_compute_histogram_stats,
_compute_histogram_timeslices,
compute_stats,
)
from aiperf.server_metrics.storage import (
ServerMetricKey,
Expand Down Expand Up @@ -87,6 +89,31 @@ def test_gauge_with_time_filter(self):
assert result.stats.max == 19.0


# =============================================================================
# Unknown Metric Type Tests
# =============================================================================


class TestUnknownExportStats:
"""Test UNKNOWN metric type is routed to gauge stats."""

def test_unknown_type_computes_gauge_stats(self):
"""Test UNKNOWN metric type produces identical results to GAUGE."""
ts = ServerMetricsTimeSeries()
add_gauge_samples(ts, "unknown_metric", [10.0, 20.0, 30.0])

result = compute_stats(
metric_type=PrometheusMetricType.UNKNOWN,
time_series=get_gauge(ts, "unknown_metric"),
time_filter=make_time_filter(),
)

assert result is not None
assert result.stats.avg == 20.0
assert result.stats.min == 10.0
assert result.stats.max == 30.0


# =============================================================================
# Counter Tests
# =============================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,37 +96,24 @@ def test_empty_histogram_timeslices_returns_none(self) -> None:

assert result is None

def test_compute_stats_with_empty_gauge(self) -> None:
"""Test compute_stats dispatcher handles empty gauge series."""
series = ScalarTimeSeries()

result = compute_stats(
metric_type=PrometheusMetricType.GAUGE,
time_series=series,
time_filter=make_time_filter(),
)

assert result is None

def test_compute_stats_with_empty_counter(self) -> None:
"""Test compute_stats dispatcher handles empty counter series."""
series = ScalarTimeSeries()

result = compute_stats(
metric_type=PrometheusMetricType.COUNTER,
time_series=series,
time_filter=make_time_filter(),
)

assert result is None

def test_compute_stats_with_empty_histogram(self) -> None:
"""Test compute_stats dispatcher handles empty histogram series."""
series = HistogramTimeSeries()

@pytest.mark.parametrize(
"metric_type,series_factory",
[
(PrometheusMetricType.GAUGE, ScalarTimeSeries),
(PrometheusMetricType.UNKNOWN, ScalarTimeSeries),
(PrometheusMetricType.COUNTER, ScalarTimeSeries),
(PrometheusMetricType.HISTOGRAM, HistogramTimeSeries),
],
)
def test_compute_stats_with_empty_series(
self,
metric_type: PrometheusMetricType,
series_factory: type,
) -> None:
"""Test compute_stats dispatcher returns None for empty series."""
result = compute_stats(
metric_type=PrometheusMetricType.HISTOGRAM,
time_series=series,
metric_type=metric_type,
time_series=series_factory(),
time_filter=make_time_filter(),
)

Expand Down
1 change: 1 addition & 0 deletions tests/unit/server_metrics/test_storage_edge_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ class TestServerMetricEntry:
(PrometheusMetricType.GAUGE, ScalarTimeSeries),
(PrometheusMetricType.COUNTER, ScalarTimeSeries),
(PrometheusMetricType.HISTOGRAM, HistogramTimeSeries),
(PrometheusMetricType.UNKNOWN, ScalarTimeSeries),
],
)
def test_from_metric_family(
Expand Down