@@ -43,106 +43,162 @@ def __init__(
4343 )
4444
4545 def _prepare_data (self , data ) -> pd .DataFrame :
46- """Prepare SxS data: expand each pair into two rows (one per model) .
46+ """Prepare SxS data as per-model, per-property rows .
4747
48- Produces the same schema expected by FunctionalMetrics:
49- columns: [conversation_id, conversation_metadata, property_metadata, model, cluster, property_description, scores]
48+ This version respects the *model* associated with each property:
49+ clusters are linked to individual properties via ``property_ids``,
50+ and properties carry the ``model`` field. Conversation-level
51+ metadata and scores are then joined on ``(conversation_id, model)``.
5052 """
51- # Extract clusters and properties data
52- if not data .clusters :
53+ # Require both clusters and properties. If either is missing, we cannot compute metrics.
54+ if not data .clusters or not data . properties :
5355 return pd .DataFrame ()
5456
55- clusters = pd .DataFrame ([cluster .to_dict () for cluster in data .clusters ])
56-
57- # Explode only aligned columns to avoid mismatched element counts
58- clusters = clusters .explode (["property_descriptions" , "question_ids" ]).drop_duplicates (
59- subset = ["property_descriptions" , "question_ids" ]
60- )
61- clusters = clusters .dropna (subset = ["property_descriptions" , "question_ids" ])
62- clusters = clusters .rename (
63- {"question_ids" : "question_id" , "property_descriptions" : "property_description" }, axis = 1
57+ # ------------------------------------------------------------------
58+ # 1) Cluster information at the property level
59+ # ------------------------------------------------------------------
60+ clusters_df = pd .DataFrame ([cluster .to_dict () for cluster in data .clusters ])
61+
62+ # Explode aligned list columns so each row corresponds to a single property id
63+ list_cols = ["property_ids" , "property_descriptions" , "question_ids" ]
64+ existing_list_cols = [c for c in list_cols if c in clusters_df .columns ]
65+ if existing_list_cols :
66+ clusters_df = clusters_df .explode (existing_list_cols , ignore_index = True )
67+
68+ clusters_df = clusters_df .rename (
69+ {
70+ "property_ids" : "property_id" ,
71+ "property_descriptions" : "property_description" ,
72+ "question_ids" : "conversation_id" ,
73+ "label" : "cluster" ,
74+ },
75+ axis = 1 ,
6476 )
6577
66- # Prepare base properties frame directly from clusters
67- properties = clusters .rename ({"label" : "cluster" }, axis = 1 )
78+ # Keep only the columns needed downstream for metrics
79+ cluster_cols = ["property_id" , "cluster" , "property_description" ]
80+ if "meta" in clusters_df .columns :
81+ clusters_df ["cluster_metadata" ] = clusters_df ["meta" ]
82+ cluster_cols .append ("cluster_metadata" )
83+ clusters_df = clusters_df [cluster_cols ]
84+
85+ # ------------------------------------------------------------------
86+ # 2) Property information (includes the model that owns the property)
87+ # ------------------------------------------------------------------
88+ properties_df = pd .DataFrame ([prop .to_dict () for prop in data .properties ])
89+ properties_df = properties_df .rename (
90+ {"id" : "property_id" , "question_id" : "conversation_id" }, axis = 1
91+ )
6892
69- # Expand conversations: one row per model with per-model scores
93+ # ------------------------------------------------------------------
94+ # 3) Conversation-level scores and metadata, expanded per model
95+ # ------------------------------------------------------------------
7096 expanded_rows : List [Dict [str , Any ]] = []
7197 for conv in data .conversations :
72- qid = conv .question_id
73- meta = conv .meta
74-
75- # Side-by-side: conv.model is a list/tuple of two models
76- model_a , model_b = conv .model [0 ], conv .model [1 ]
77- expanded_rows .append (
78- {
79- "question_id" : qid ,
80- "scores" : self ._transform_scores_for_model (conv .scores , model_a , model_b , conv ),
81- "conversation_metadata" : meta ,
82- "model_name" : model_a ,
83- }
84- )
85- expanded_rows .append (
86- {
87- "question_id" : qid ,
88- "scores" : self ._transform_scores_for_model (conv .scores , model_b , model_a , conv ),
89- "conversation_metadata" : meta ,
90- "model_name" : model_b ,
91- }
92- )
93-
94- conversations = pd .DataFrame (expanded_rows )
98+ conversation_id = conv .question_id
99+ conversation_metadata = conv .meta
100+
101+ # Side-by-side: conv.model is a pair of models
102+ if isinstance (conv .model , (list , tuple )) and len (conv .model ) == 2 :
103+ model_a , model_b = conv .model [0 ], conv .model [1 ]
104+
105+ expanded_rows .append (
106+ {
107+ "conversation_id" : conversation_id ,
108+ "model" : model_a ,
109+ "scores" : self ._transform_scores_for_model (
110+ conv .scores , model_a , model_b , conv
111+ ),
112+ "conversation_metadata" : conversation_metadata ,
113+ }
114+ )
115+ expanded_rows .append (
116+ {
117+ "conversation_id" : conversation_id ,
118+ "model" : model_b ,
119+ "scores" : self ._transform_scores_for_model (
120+ conv .scores , model_b , model_a , conv
121+ ),
122+ "conversation_metadata" : conversation_metadata ,
123+ }
124+ )
125+
126+ conversations_df = pd .DataFrame (expanded_rows )
127+
128+ # ------------------------------------------------------------------
129+ # 4) Join: properties ↔ conversations ↔ clusters
130+ # ------------------------------------------------------------------
131+ # First, attach per-model scores/metadata to properties via (conversation_id, model)
132+ properties_with_conv = properties_df .merge (
133+ conversations_df ,
134+ on = ["conversation_id" , "model" ],
135+ how = "left" ,
136+ )
95137
96- properties = properties .merge (conversations , on = "question_id" , how = "left" ).rename (
97- {"label" : "cluster" , "question_id" : "conversation_id" },
98- axis = 1 ,
138+ # Then attach cluster labels/metadata via property_id. This may produce
139+ # suffixed property_description columns (e.g. _x / _y); we'll reconcile
140+ # those immediately afterwards.
141+ full_df = properties_with_conv .merge (
142+ clusters_df ,
143+ on = "property_id" ,
144+ how = "left" ,
99145 )
100146
101- # Set model from expanded conversation rows
102- properties ["model" ] = properties ["model_name" ]
103- properties = properties .drop ("model_name" , axis = 1 )
104-
105- # Ensure conversation_metadata exists - fill missing values with empty dict
106- if "conversation_metadata" not in properties .columns :
107- properties ["conversation_metadata" ] = {}
108- else :
109- properties ["conversation_metadata" ] = properties ["conversation_metadata" ].fillna ({})
147+ # Normalise property_description column name after merge
148+ if "property_description" not in full_df .columns :
149+ prop_x = full_df .get ("property_description_x" )
150+ prop_y = full_df .get ("property_description_y" )
151+ if prop_x is not None and prop_y is not None :
152+ full_df ["property_description" ] = prop_x .combine_first (prop_y )
153+ full_df = full_df .drop (
154+ columns = [c for c in ["property_description_x" , "property_description_y" ] if c in full_df .columns ]
155+ )
156+ elif prop_x is not None :
157+ full_df ["property_description" ] = prop_x
158+ full_df = full_df .drop (columns = ["property_description_x" ])
159+ elif prop_y is not None :
160+ full_df ["property_description" ] = prop_y
161+ full_df = full_df .drop (columns = ["property_description_y" ])
162+
163+ # Derive property_metadata from the property description if not provided
164+ if "property_metadata" not in full_df .columns :
165+ full_df ["property_metadata" ] = full_df ["property_description" ].apply (
166+ lambda x : {"property_description" : x }
167+ )
110168
111- # print(properties['cluster_metadata'].head())
112-
113- # Handle cluster_metadata from the cluster's meta field
114- if "meta" in properties .columns :
115- properties ["cluster_metadata" ] = properties ["meta" ]
116- properties = properties .drop ("meta" , axis = 1 )
117- else :
118- properties ["cluster_metadata" ] = {}
119-
120- properties ["property_metadata" ] = properties ["property_description" ].apply (
121- lambda x : {"property_description" : x }
122- )
169+ # Ensure conversation_metadata and cluster_metadata columns exist
170+ if "conversation_metadata" not in full_df .columns :
171+ full_df ["conversation_metadata" ] = {}
172+ if "cluster_metadata" not in full_df .columns :
173+ full_df ["cluster_metadata" ] = {}
123174
124- # Match the column selection from functional_metrics exactly
175+ # ------------------------------------------------------------------
176+ # 5) Match the schema expected by FunctionalMetrics
177+ # ------------------------------------------------------------------
125178 important_columns = [
126- "conversation_id" , "conversation_metadata" , "property_metadata" ,
127- "model" , "cluster" , "property_description" , "scores" , "cluster_metadata"
179+ "conversation_id" ,
180+ "conversation_metadata" ,
181+ "property_metadata" ,
182+ "model" ,
183+ "cluster" ,
184+ "property_description" ,
185+ "scores" ,
186+ "cluster_metadata" ,
128187 ]
129188
130-
131-
132189 # Ensure all required columns exist before filtering
133190 for col in important_columns :
134- if col not in properties .columns :
191+ if col not in full_df .columns :
135192 if col == "scores" :
136- properties [col ] = {}
193+ full_df [col ] = {}
137194 elif col == "model" :
138- properties [col ] = "unknown"
195+ full_df [col ] = "unknown"
139196 elif col in ["cluster_metadata" , "conversation_metadata" ]:
140- properties [col ] = {}
197+ full_df [col ] = {}
141198 else :
142- properties [col ] = ""
143-
144- properties = properties [important_columns ]
145- return properties
199+ full_df [col ] = ""
200+
201+ return full_df [important_columns ]
146202
147203 @staticmethod
148204 def _transform_scores_for_model (all_scores : List [Dict [str , Any ]], this_model : str , other_model : str , conversation = None ) -> Dict [str , float ]:
0 commit comments