Skip to content

Commit 39c5c8a

Browse files
committed
Change Re-zip structure
1 parent ceb4eee commit 39c5c8a

File tree

4 files changed

+71
-68
lines changed

4 files changed

+71
-68
lines changed

.github/workflows/run-benchmark.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ jobs:
115115

116116
- name: Re-zip provenance folder
117117
run: |
118-
zip -r "./${SNAKEMAKE_PROVENANCE_FILE}.zip" "./${SNAKEMAKE_PROVENANCE_FILE}"
118+
cd "./${SNAKEMAKE_PROVENANCE_FILE}"
119+
zip -r "../${SNAKEMAKE_PROVENANCE_FILE}.zip" .
120+
119121
120122
- name: Upload RoCrate Zip file onto RoHub
121123
shell: bash -l {0}

benchmarks/linear-elastic-plate-with-hole/plot_provenance.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
def parse_args():
1010
"""
1111
Parse command-line arguments for the provenance processing script.
12-
12+
1313
Returns:
1414
argparse.Namespace: Parsed arguments containing:
1515
- provenance_folderpath: Path to the folder with RO-Crate data
@@ -43,14 +43,14 @@ def parse_args():
4343
def sparql_result_to_dataframe(results):
4444
"""
4545
Convert SPARQL query results into a pandas DataFrame.
46-
46+
4747
Extracts variable bindings from each result row using asdict() and converts
4848
RDF values to Python native types using toPython().
49-
49+
5050
Args:
51-
results (rdflib.plugins.sparql.processor.SPARQLResult): SPARQL query results
51+
results (rdflib.plugins.sparql.processor.SPARQLResult): SPARQL query results
5252
from rdflib.
53-
53+
5454
Returns:
5555
pd.DataFrame: DataFrame where each row represents a query result and columns
5656
correspond to SPARQL variables.
@@ -67,16 +67,16 @@ def sparql_result_to_dataframe(results):
6767
def apply_custom_filters(data: pd.DataFrame) -> pd.DataFrame:
6868
"""
6969
Filter provenance data to include only first-order linear elements.
70-
70+
7171
Filters rows where element_degree = 1 and element_order = 1, then removes
7272
these filtering columns from the result.
73-
73+
7474
Args:
75-
data (pd.DataFrame): Input DataFrame containing element_degree and
75+
data (pd.DataFrame): Input DataFrame containing element_degree and
7676
element_order columns.
77-
77+
7878
Returns:
79-
pd.DataFrame: Filtered DataFrame with element_degree and element_order
79+
pd.DataFrame: Filtered DataFrame with element_degree and element_order
8080
columns removed and index reset.
8181
"""
8282
filtered_df = data[(data["element_degree"] == 1) & (data["element_order"] == 1)]
@@ -89,16 +89,16 @@ def apply_custom_filters(data: pd.DataFrame) -> pd.DataFrame:
8989
def summary_file_to_dataframe(summary_path, parameters, metrics):
9090
"""
9191
Load benchmark data from a summary.json file into a DataFrame.
92-
92+
9393
Handles both dictionary-style parameter/metric values (with 'value' key) and
9494
direct scalar values. Converts parameter names from underscore to hyphen format
9595
for JSON lookup.
96-
96+
9797
Args:
9898
summary_path (str): Path to the summary.json file.
9999
parameters (list): List of parameter names to extract.
100100
metrics (list): List of metric names to extract.
101-
101+
102102
Returns:
103103
pd.DataFrame: DataFrame with columns for each parameter and metric.
104104
"""
@@ -134,20 +134,20 @@ def summary_file_to_dataframe(summary_path, parameters, metrics):
134134
def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame):
135135
"""
136136
Compare two DataFrames for identical content regardless of row order.
137-
137+
138138
Sorts both DataFrames by all columns, then checks for equality. If differences
139139
are found, prints rows that appear in one DataFrame but not the other.
140-
140+
141141
Args:
142142
df1 (pd.DataFrame): First DataFrame to compare.
143143
df2 (pd.DataFrame): Second DataFrame to compare.
144-
144+
145145
Returns:
146146
bool: True if DataFrames contain identical data, False otherwise.
147-
147+
148148
Raises:
149149
ValueError: If the DataFrames have different columns.
150-
150+
151151
Prints:
152152
Rows that are present in one DataFrame but missing in the other,
153153
when differences are detected.
@@ -186,23 +186,23 @@ def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame):
186186
def load_and_query_graph(analyzer, parameters, metrics, tools):
187187
"""
188188
Load the RO-Crate graph and execute a SPARQL query to extract provenance data.
189-
189+
190190
Args:
191191
analyzer (ProvenanceAnalyzer): Initialized analyzer instance.
192192
parameters (list): List of parameter names to query.
193193
metrics (list): List of metric names to query.
194194
tools (list): List of tool names to filter by.
195-
195+
196196
Returns:
197197
pd.DataFrame: DataFrame containing the query results.
198-
198+
199199
Raises:
200200
AssertionError: If the query returns no data.
201201
"""
202202
graph = analyzer.load_graph_from_file()
203203
query = analyzer.build_dynamic_query(parameters, metrics, tools)
204204
results = analyzer.run_query_on_graph(graph, query)
205-
205+
206206
provenance_df = sparql_result_to_dataframe(results)
207207
assert len(provenance_df), "No data found for the provenance query."
208208

@@ -214,18 +214,18 @@ def validate_provenance_data(
214214
):
215215
"""
216216
Validate provenance query results against ground truth data from summary.json files.
217-
217+
218218
For each tool, loads the corresponding summary.json file and compares its data
219219
against the filtered provenance query results for that tool.
220-
220+
221221
Args:
222222
provenance_df (pd.DataFrame): DataFrame containing all provenance query results.
223223
parameters (list): List of parameter names used in the comparison.
224224
metrics (list): List of metric names used in the comparison.
225225
tools (list): List of tool names to validate.
226-
provenance_folderpath (str): Base path to the provenance folder containing
226+
provenance_folderpath (str): Base path to the provenance folder containing
227227
summary.json files.
228-
228+
229229
Raises:
230230
AssertionError: If data mismatch is found between summary.json and provenance
231231
data for any tool.
@@ -252,14 +252,14 @@ def validate_provenance_data(
252252
def plot_results(analyzer, final_df, output_file):
253253
"""
254254
Generate a visualization plot of the provenance results.
255-
255+
256256
Creates a scatter/line plot showing the relationship between element size
257257
and maximum von Mises stress, grouped by tool name.
258-
258+
259259
Args:
260260
analyzer (ProvenanceAnalyzer): Initialized analyzer instance.
261261
final_df (pd.DataFrame): DataFrame containing filtered data to plot.
262-
Expected columns: element_size, max_von_mises_stress_nodes,
262+
Expected columns: element_size, max_von_mises_stress_nodes,
263263
tool_name (in that order).
264264
output_file (str): Path where the plot image will be saved.
265265
"""
@@ -278,15 +278,15 @@ def plot_results(analyzer, final_df, output_file):
278278
def run(args, parameters, metrics, tools):
279279
"""
280280
Execute the complete provenance analysis workflow.
281-
281+
282282
Performs the following steps:
283283
1. Initialize the ProvenanceAnalyzer
284284
2. Validate the RO-Crate metadata structure
285285
3. Load and query the provenance graph
286286
4. Validate query results against summary.json ground truth data
287287
5. Apply custom filters to the data
288288
6. Generate visualization plot
289-
289+
290290
Args:
291291
args (argparse.Namespace): Parsed command-line arguments.
292292
parameters (list): List of parameter names to extract.
@@ -297,7 +297,7 @@ def run(args, parameters, metrics, tools):
297297
provenance_folderpath=args.provenance_folderpath,
298298
provenance_filename=args.provenance_filename,
299299
)
300-
300+
301301
analyzer.validate_provevance()
302302

303303
provenance_df = load_and_query_graph(analyzer, parameters, metrics, tools)
@@ -314,7 +314,7 @@ def run(args, parameters, metrics, tools):
314314
def main():
315315
"""
316316
Main entry point for the provenance analysis script.
317-
317+
318318
Parses command-line arguments, defines the parameters and metrics to extract,
319319
retrieves tool names from the workflow configuration, and executes the analysis
320320
workflow.
@@ -329,4 +329,4 @@ def main():
329329

330330

331331
if __name__ == "__main__":
332-
main()
332+
main()

benchmarks/linear-elastic-plate-with-hole/provenance.py

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import re
77
from rocrate_validator import services, models
88

9+
910
class ProvenanceAnalyzer:
1011
"""
1112
A class to analyze, validate, and visualize provenance data from RO-Crate metadata files.
1213
13-
This class loads RO-Crate JSON-LD files, builds dynamic SPARQL queries to extract
14-
workflow metadata about methods, parameters, and metrics, and provides visualization
14+
This class loads RO-Crate JSON-LD files, builds dynamic SPARQL queries to extract
15+
workflow metadata about methods, parameters, and metrics, and provides visualization
1516
capabilities. It also validates RO-Crate files against the RO-Crate 1.1 profile.
1617
1718
Attributes:
@@ -61,13 +62,13 @@ def load_graph_from_file(self) -> Graph:
6162
def sanitize_variable_name(self, name: str) -> str:
6263
"""
6364
Convert a string into a valid SPARQL variable name.
64-
65+
6566
Replaces invalid characters with underscores and ensures the variable
6667
name doesn't start with a digit.
67-
68+
6869
Args:
6970
name (str): The original string to convert.
70-
71+
7172
Returns:
7273
str: A sanitized variable name safe for use in SPARQL queries.
7374
"""
@@ -80,20 +81,20 @@ def sanitize_variable_name(self, name: str) -> str:
8081

8182
def build_dynamic_query(self, parameters, metrics, tools=None, named_graph=None):
8283
"""
83-
Generate a dynamic SPARQL query to extract m4i:Method instances with specified
84+
Generate a dynamic SPARQL query to extract m4i:Method instances with specified
8485
parameters and metrics.
85-
86+
8687
The query extracts methods along with their associated parameters (via m4i:hasParameter),
8788
metrics (via m4i:investigates), and the tools that implement them (via ssn:implementedBy).
88-
89+
8990
Args:
9091
parameters (list): List of parameter names to query (matched via rdfs:label).
9192
metrics (list): List of metric names to query (matched via rdfs:label).
92-
tools (list, optional): List of tool name substrings to filter results.
93+
tools (list, optional): List of tool name substrings to filter results.
9394
Case-insensitive matching. Defaults to None.
94-
named_graph (str, optional): URI of a named graph to query within.
95+
named_graph (str, optional): URI of a named graph to query within.
9596
If None, queries the default graph. Defaults to None.
96-
97+
9798
Returns:
9899
str: A complete SPARQL query string ready to execute.
99100
"""
@@ -171,7 +172,7 @@ def run_query_on_graph(
171172
query (str): The SPARQL query string to execute.
172173
173174
Returns:
174-
rdflib.plugins.sparql.processor.SPARQLResult: The query results object
175+
rdflib.plugins.sparql.processor.SPARQLResult: The query results object
175176
from rdflib.
176177
"""
177178
return graph.query(query)
@@ -190,7 +191,7 @@ def plot_provenance_graph(
190191
):
191192
"""
192193
Generates a scatter/line plot from the extracted provenance data.
193-
194+
194195
The plot displays data points grouped by a specified column, with each group
195196
shown as a separate line series. The x-axis uses a logarithmic scale.
196197
@@ -202,9 +203,9 @@ def plot_provenance_graph(
202203
y_axis_index (int or str): Index or key for the y-axis values in each row.
203204
group_by_index (int or str): Index or key for the grouping variable (used for legend).
204205
title (str): Title of the plot.
205-
output_file (str, optional): Path where the plot will be saved as an image.
206+
output_file (str, optional): Path where the plot will be saved as an image.
206207
If None, displays the plot. Defaults to None.
207-
figsize (Tuple[int, int], optional): Figure dimensions (width, height).
208+
figsize (Tuple[int, int], optional): Figure dimensions (width, height).
208209
Defaults to (12, 5).
209210
"""
210211

@@ -244,36 +245,32 @@ def plot_provenance_graph(
244245
else:
245246
plt.show()
246247

247-
248-
def validate_provevance(self):
248+
def validate_provevance(self):
249249
"""
250250
Validates the RO-Crate against the RO-Crate 1.1 profile.
251-
251+
252252
Uses the rocrate-validator library to check if the RO-Crate metadata
253253
conforms to the RO-Crate 1.1 specification with required severity level.
254-
254+
255255
Raises:
256256
AssertionError: If the RO-Crate has validation issues, with details
257257
about each issue's severity and message.
258-
258+
259259
Prints:
260260
Success message if the RO-Crate is valid.
261261
"""
262262
settings = services.ValidationSettings(
263263
rocrate_uri=self.provenance_folderpath,
264-
profile_identifier='ro-crate-1.1',
264+
profile_identifier="ro-crate-1.1",
265265
requirement_severity=models.Severity.REQUIRED,
266266
)
267267

268268
result = services.validate(settings)
269269

270-
assert not result.has_issues(), (
271-
"RO-Crate is invalid!\n" +
272-
"\n".join(
273-
f"Detected issue of severity {issue.severity.name} with check "
274-
f'"{issue.check.identifier}": {issue.message}'
275-
for issue in result.get_issues()
276-
)
270+
assert not result.has_issues(), "RO-Crate is invalid!\n" + "\n".join(
271+
f"Detected issue of severity {issue.severity.name} with check "
272+
f'"{issue.check.identifier}": {issue.message}'
273+
for issue in result.get_issues()
277274
)
278275

279-
print("RO-Crate is valid!")
276+
print("RO-Crate is valid!")

benchmarks/linear-elastic-plate-with-hole/upload_provenance.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ def parse_args():
3030

3131
def run(args):
3232
rohub.settings.SLEEP_TIME = 10
33-
33+
3434
USE_DEVELOPMENT_VERSION = True
35+
3536
if USE_DEVELOPMENT_VERSION:
3637
rohub.settings.API_URL = "https://rohub2020-rohub.apps.paas-dev.psnc.pl/api/"
3738
rohub.settings.KEYCLOAK_CLIENT_ID = "rohub2020-cli"
@@ -42,7 +43,7 @@ def run(args):
4243
)
4344

4445
rohub.login(args.username, args.password)
45-
46+
4647
my_ros = rohub.list_my_ros()
4748

4849
try:
@@ -81,14 +82,17 @@ def run(args):
8182
ANNOTATION_PREDICATE = "http://w3id.org/nfdi4ing/metadata4ing#investigates"
8283
ANNOTATION_OBJECT = "https://github.com/BAMresearch/NFDI4IngModelValidationPlatform/tree/main/benchmarks/linear-elastic-plate-with-hole"
8384

84-
if (uuid != ""):
85+
if uuid != "":
8586
_RO = rohub.ros_load(uuid)
86-
annotation_json = [{"property": ANNOTATION_PREDICATE, "value": ANNOTATION_OBJECT}]
87+
annotation_json = [
88+
{"property": ANNOTATION_PREDICATE, "value": ANNOTATION_OBJECT}
89+
]
8790
add_annotations_result = _RO.add_annotations(
8891
body_specification_json=annotation_json
8992
)
9093
print(add_annotations_result)
9194

95+
9296
def main():
9397
args = parse_args()
9498
run(args)

0 commit comments

Comments
 (0)