1010import numpy as np
1111import pandas as pd
1212import pytest
13+ from pandas import Interval
1314from pandas ._testing import assert_frame_equal
1415
1516import neptune_query as npt
@@ -1358,12 +1359,8 @@ def test_create_empty_metric_buckets_dataframe():
13581359 )
13591360
13601361 # Then
1361- expected_df = (
1362- pd .DataFrame (data = {"bucket" : []}).astype (dtype = {"bucket" : "interval[float64, right]" }).set_index ("bucket" )
1363- )
1364- expected_df .columns = pd .MultiIndex .from_product (
1365- [[], [], ["local_min" , "local_max" ]], names = ["experiment" , "metric" , "bucket" ]
1366- )
1362+ expected_df = pd .DataFrame (data = {"bucket" : []}).astype (dtype = {"bucket" : "object" }).set_index ("bucket" )
1363+ expected_df .columns = pd .MultiIndex .from_product ([[], [], ["x" , "y" ]], names = ["experiment" , "metric" , "bucket" ])
13671364 expected_df .index .name = None
13681365
13691366 pd .testing .assert_frame_equal (df , expected_df )
@@ -1384,7 +1381,7 @@ def test_create_metric_buckets_dataframe():
13841381 assert not df .empty , "DataFrame should not be empty"
13851382
13861383 # Check the shape of the DataFrame
1387- num_expected_rows = BUCKETS
1384+ num_expected_rows = BUCKETS - 1
13881385 assert df .shape [0 ] == num_expected_rows , f"DataFrame should have { num_expected_rows } rows"
13891386
13901387 # Check the columns of the DataFrame
@@ -1403,6 +1400,77 @@ def test_create_metric_buckets_dataframe():
14031400 assert df .columns .get_level_values (2 ).nunique () == len (METRICS ), f"DataFrame should have { METRICS } metrics"
14041401
14051402
1403+ @pytest .mark .parametrize (
1404+ "data,expected_df" ,
1405+ [
1406+ (
1407+ {
1408+ _generate_run_attribute_definition (experiment = 1 , path = 1 ): [
1409+ _generate_bucket_metric (index = 0 ),
1410+ ]
1411+ },
1412+ pd .DataFrame (
1413+ {
1414+ ("exp1" , "path1" , "x" ): [20.0 ],
1415+ ("exp1" , "path1" , "y" ): [0.0 ],
1416+ },
1417+ index = pd .Index ([Interval (20.0 , 20.0 , closed = "both" )], dtype = "object" ),
1418+ ),
1419+ ),
1420+ (
1421+ {
1422+ _generate_run_attribute_definition (experiment = 1 , path = 1 ): [
1423+ _generate_bucket_metric (index = 0 ),
1424+ _generate_bucket_metric (index = 2 ),
1425+ ]
1426+ },
1427+ pd .DataFrame (
1428+ {
1429+ ("exp1" , "path1" , "x" ): [20.0 , 58.0 ],
1430+ ("exp1" , "path1" , "y" ): [0.0 , 200.0 ],
1431+ },
1432+ index = pd .Index (
1433+ [Interval (20.0 , 40.0 , closed = "both" ), Interval (40.0 , 60.0 , closed = "right" )], dtype = "object"
1434+ ),
1435+ ),
1436+ ),
1437+ (
1438+ {
1439+ _generate_run_attribute_definition (experiment = 1 , path = 1 ): [
1440+ _generate_bucket_metric (index = 0 ),
1441+ _generate_bucket_metric (index = 3 ),
1442+ ]
1443+ },
1444+ pd .DataFrame (
1445+ {
1446+ ("exp1" , "path1" , "x" ): [20.0 , 78.0 ],
1447+ ("exp1" , "path1" , "y" ): [0.0 , 300.0 ],
1448+ },
1449+ index = pd .Index (
1450+ [Interval (20.0 , 40.0 , closed = "both" ), Interval (60.0 , 80.0 , closed = "right" )], dtype = "object"
1451+ ),
1452+ ),
1453+ ),
1454+ ],
1455+ )
1456+ def test_create_metric_buckets_dataframe_parametrized (data , expected_df ):
1457+ # Given
1458+ sys_id_label_mapping = {
1459+ SysId ("sysid1" ): "exp1" ,
1460+ }
1461+ expected_df .columns .names = ["experiment" , "metric" , "bucket" ]
1462+
1463+ # When
1464+ df = create_metric_buckets_dataframe (
1465+ buckets_data = data ,
1466+ sys_id_label_mapping = sys_id_label_mapping ,
1467+ container_column_name = "experiment" ,
1468+ )
1469+
1470+ # Then
1471+ pd .testing .assert_frame_equal (df , expected_df )
1472+
1473+
14061474def test_create_metric_buckets_dataframe_missing_values ():
14071475 # Given
14081476 data = {
@@ -1432,17 +1500,17 @@ def test_create_metric_buckets_dataframe_missing_values():
14321500
14331501 # Then
14341502 expected = {
1435- ("exp1" , "path1" , "x" ): [20.0 , 38.0 , np .nan ],
1436- ("exp1" , "path1" , "y" ): [0.0 , 100.0 , np .nan ],
1437- ("exp1" , "path2" , "x" ): [np . nan , 38.0 , 58.0 ],
1438- ("exp1" , "path2" , "y" ): [np . nan , 100.0 , 200.0 ],
1439- ("exp2" , "path1" , "x" ): [20.0 , np . nan , 58.0 ],
1440- ("exp2" , "path1" , "y" ): [0.0 , np . nan , 200.00 ],
1503+ ("exp1" , "path1" , "x" ): [38.0 , np .nan ],
1504+ ("exp1" , "path1" , "y" ): [100.0 , np .nan ],
1505+ ("exp1" , "path2" , "x" ): [38.0 , 58.0 ],
1506+ ("exp1" , "path2" , "y" ): [100.0 , 200.0 ],
1507+ ("exp2" , "path1" , "x" ): [20.0 , 58.0 ],
1508+ ("exp2" , "path1" , "y" ): [0.0 , 200.00 ],
14411509 }
14421510
14431511 expected_df = pd .DataFrame (
14441512 dict (sorted (expected .items ())),
1445- index = pd .IntervalIndex . from_tuples ([( float ( "-inf" ), 20.0 ), ( 20 .0 , 40.0 ), (40.0 , 60.0 )]),
1513+ index = pd .Index ([ Interval ( 20.0 , 40 .0 , closed = "both" ), Interval (40.0 , 60.0 , closed = "right" )]),
14461514 )
14471515 expected_df .columns .names = ["experiment" , "metric" , "bucket" ]
14481516
@@ -1456,6 +1524,7 @@ def test_create_metric_buckets_dataframe_sorted():
14561524 _generate_bucket_metric (index = 2 ),
14571525 _generate_bucket_metric (index = 0 ),
14581526 _generate_bucket_metric (index = 1 ),
1527+ _generate_bucket_metric (index = 3 ),
14591528 ],
14601529 }
14611530 sys_id_label_mapping = {
@@ -1470,13 +1539,19 @@ def test_create_metric_buckets_dataframe_sorted():
14701539
14711540 # Then
14721541 expected = {
1473- ("exp1" , "path1" , "x" ): [20 .0 , 38 .0 , 58 .0 ],
1474- ("exp1" , "path1" , "y" ): [0 .0 , 100 .0 , 200 .0 ],
1542+ ("exp1" , "path1" , "x" ): [38 .0 , 58 .0 , 78 .0 ],
1543+ ("exp1" , "path1" , "y" ): [100 .0 , 200 .0 , 300 .0 ],
14751544 }
14761545
14771546 expected_df = pd .DataFrame (
14781547 dict (sorted (expected .items ())),
1479- index = pd .IntervalIndex .from_tuples ([(float ("-inf" ), 20.0 ), (20.0 , 40.0 ), (40.0 , 60.0 )]),
1548+ index = pd .Index (
1549+ [
1550+ Interval (20.0 , 40.0 , closed = "both" ),
1551+ Interval (40.0 , 60.0 , closed = "right" ),
1552+ Interval (60.0 , 80.0 , closed = "right" ),
1553+ ]
1554+ ),
14801555 )
14811556 expected_df .columns .names = ["experiment" , "metric" , "bucket" ]
14821557
0 commit comments