99def parse_args ():
1010 """
1111 Parse command-line arguments for the provenance processing script.
12-
12+
1313 Returns:
1414 argparse.Namespace: Parsed arguments containing:
1515 - provenance_folderpath: Path to the folder with RO-Crate data
@@ -43,14 +43,14 @@ def parse_args():
4343def sparql_result_to_dataframe (results ):
4444 """
4545 Convert SPARQL query results into a pandas DataFrame.
46-
46+
4747 Extracts variable bindings from each result row using asdict() and converts
4848 RDF values to Python native types using toPython().
49-
49+
5050 Args:
51- results (rdflib.plugins.sparql.processor.SPARQLResult): SPARQL query results
51+ results (rdflib.plugins.sparql.processor.SPARQLResult): SPARQL query results
5252 from rdflib.
53-
53+
5454 Returns:
5555 pd.DataFrame: DataFrame where each row represents a query result and columns
5656 correspond to SPARQL variables.
@@ -67,16 +67,16 @@ def sparql_result_to_dataframe(results):
6767def apply_custom_filters (data : pd .DataFrame ) -> pd .DataFrame :
6868 """
6969 Filter provenance data to include only first-order linear elements.
70-
70+
7171 Filters rows where element_degree = 1 and element_order = 1, then removes
7272 these filtering columns from the result.
73-
73+
7474 Args:
75- data (pd.DataFrame): Input DataFrame containing element_degree and
75+ data (pd.DataFrame): Input DataFrame containing element_degree and
7676 element_order columns.
77-
77+
7878 Returns:
79- pd.DataFrame: Filtered DataFrame with element_degree and element_order
79+ pd.DataFrame: Filtered DataFrame with element_degree and element_order
8080 columns removed and index reset.
8181 """
8282 filtered_df = data [(data ["element_degree" ] == 1 ) & (data ["element_order" ] == 1 )]
@@ -89,16 +89,16 @@ def apply_custom_filters(data: pd.DataFrame) -> pd.DataFrame:
8989def summary_file_to_dataframe (summary_path , parameters , metrics ):
9090 """
9191 Load benchmark data from a summary.json file into a DataFrame.
92-
92+
9393 Handles both dictionary-style parameter/metric values (with 'value' key) and
9494 direct scalar values. Converts parameter names from underscore to hyphen format
9595 for JSON lookup.
96-
96+
9797 Args:
9898 summary_path (str): Path to the summary.json file.
9999 parameters (list): List of parameter names to extract.
100100 metrics (list): List of metric names to extract.
101-
101+
102102 Returns:
103103 pd.DataFrame: DataFrame with columns for each parameter and metric.
104104 """
@@ -134,20 +134,20 @@ def summary_file_to_dataframe(summary_path, parameters, metrics):
134134def compare_dataframes (df1 : pd .DataFrame , df2 : pd .DataFrame ):
135135 """
136136 Compare two DataFrames for identical content regardless of row order.
137-
137+
138138 Sorts both DataFrames by all columns, then checks for equality. If differences
139139 are found, prints rows that appear in one DataFrame but not the other.
140-
140+
141141 Args:
142142 df1 (pd.DataFrame): First DataFrame to compare.
143143 df2 (pd.DataFrame): Second DataFrame to compare.
144-
144+
145145 Returns:
146146 bool: True if DataFrames contain identical data, False otherwise.
147-
147+
148148 Raises:
149149 ValueError: If the DataFrames have different columns.
150-
150+
151151 Prints:
152152 Rows that are present in one DataFrame but missing in the other,
153153 when differences are detected.
@@ -186,23 +186,23 @@ def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame):
186186def load_and_query_graph (analyzer , parameters , metrics , tools ):
187187 """
188188 Load the RO-Crate graph and execute a SPARQL query to extract provenance data.
189-
189+
190190 Args:
191191 analyzer (ProvenanceAnalyzer): Initialized analyzer instance.
192192 parameters (list): List of parameter names to query.
193193 metrics (list): List of metric names to query.
194194 tools (list): List of tool names to filter by.
195-
195+
196196 Returns:
197197 pd.DataFrame: DataFrame containing the query results.
198-
198+
199199 Raises:
200200 AssertionError: If the query returns no data.
201201 """
202202 graph = analyzer .load_graph_from_file ()
203203 query = analyzer .build_dynamic_query (parameters , metrics , tools )
204204 results = analyzer .run_query_on_graph (graph , query )
205-
205+
206206 provenance_df = sparql_result_to_dataframe (results )
207207 assert len (provenance_df ), "No data found for the provenance query."
208208
@@ -214,18 +214,18 @@ def validate_provenance_data(
214214):
215215 """
216216 Validate provenance query results against ground truth data from summary.json files.
217-
217+
218218 For each tool, loads the corresponding summary.json file and compares its data
219219 against the filtered provenance query results for that tool.
220-
220+
221221 Args:
222222 provenance_df (pd.DataFrame): DataFrame containing all provenance query results.
223223 parameters (list): List of parameter names used in the comparison.
224224 metrics (list): List of metric names used in the comparison.
225225 tools (list): List of tool names to validate.
226- provenance_folderpath (str): Base path to the provenance folder containing
226+ provenance_folderpath (str): Base path to the provenance folder containing
227227 summary.json files.
228-
228+
229229 Raises:
230230 AssertionError: If data mismatch is found between summary.json and provenance
231231 data for any tool.
@@ -252,14 +252,14 @@ def validate_provenance_data(
252252def plot_results (analyzer , final_df , output_file ):
253253 """
254254 Generate a visualization plot of the provenance results.
255-
255+
256256 Creates a scatter/line plot showing the relationship between element size
257257 and maximum von Mises stress, grouped by tool name.
258-
258+
259259 Args:
260260 analyzer (ProvenanceAnalyzer): Initialized analyzer instance.
261261 final_df (pd.DataFrame): DataFrame containing filtered data to plot.
262- Expected columns: element_size, max_von_mises_stress_nodes,
262+ Expected columns: element_size, max_von_mises_stress_nodes,
263263 tool_name (in that order).
264264 output_file (str): Path where the plot image will be saved.
265265 """
@@ -278,15 +278,15 @@ def plot_results(analyzer, final_df, output_file):
278278def run (args , parameters , metrics , tools ):
279279 """
280280 Execute the complete provenance analysis workflow.
281-
281+
282282 Performs the following steps:
283283 1. Initialize the ProvenanceAnalyzer
284284 2. Validate the RO-Crate metadata structure
285285 3. Load and query the provenance graph
286286 4. Validate query results against summary.json ground truth data
287287 5. Apply custom filters to the data
288288 6. Generate visualization plot
289-
289+
290290 Args:
291291 args (argparse.Namespace): Parsed command-line arguments.
292292 parameters (list): List of parameter names to extract.
@@ -297,7 +297,7 @@ def run(args, parameters, metrics, tools):
297297 provenance_folderpath = args .provenance_folderpath ,
298298 provenance_filename = args .provenance_filename ,
299299 )
300-
300+
301301 analyzer .validate_provevance ()
302302
303303 provenance_df = load_and_query_graph (analyzer , parameters , metrics , tools )
@@ -314,7 +314,7 @@ def run(args, parameters, metrics, tools):
314314def main ():
315315 """
316316 Main entry point for the provenance analysis script.
317-
317+
318318 Parses command-line arguments, defines the parameters and metrics to extract,
319319 retrieves tool names from the workflow configuration, and executes the analysis
320320 workflow.
@@ -329,4 +329,4 @@ def main():
329329
330330
331331if __name__ == "__main__" :
332- main ()
332+ main ()
0 commit comments