Skip to content

Commit 5398cee

Browse files
authored
Plotting Provenance (#24)
* add process-artifacts job * Add parameter extractor * Fix yml file * Fix Parameter Extractor * Add provenance plotting * Fix python path * Add python packages * Edit Extractor * Edit filename * Edit SPARQL query * Fix Sparql query * Switch to conda for plotting * Fix Env * Merge two jobs * Edit tool name extraction * Print packages * Rollback to separate jobs * Fix Extractor * Fix post-processing env * Fix plotting bug * Fix tool extraction * Fix SPARQL query and parame extraction * Fix file name and duplicate envs * Fix param extraction * Remove unit * SPARQL refactor * Fix version finding * Fix Param Extractor * Fix case-insentitive * Remove tool from paramscript * Add dynamic tool names * Fix QUDT units * Remove qudt extraction * Remove unused method * Installing version 1.0.0 and unzipping with file name * Fix file name * Merge run-benchmark * Fix renaming
1 parent 607fba1 commit 5398cee

File tree

5 files changed

+259
-4
lines changed

5 files changed

+259
-4
lines changed

.github/workflows/run-benchmark.yml

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ jobs:
4949
run: |
5050
cd $GITHUB_WORKSPACE/benchmarks/linear-elastic-plate-with-hole/
5151
snakemake --use-conda --force --cores 'all'
52-
snakemake --use-conda --force --cores 'all' --reporter metadata4ing
53-
52+
snakemake --use-conda --force --cores all \
53+
--reporter metadata4ing \
54+
--report-metadata4ing-paramscript parameter_extractor.py \
55+
--report-metadata4ing-filename metadata4ing_provenance
56+
5457
- name: run_linear-elastic-plate-with-hole-benchmarks_nextflow
5558
shell: bash -l {0}
5659
run: |
@@ -62,11 +65,48 @@ jobs:
6265
with:
6366
name: snakemake_results_linear-elastic-plate-with-hole
6467
path: |
65-
benchmarks/linear-elastic-plate-with-hole/*.zip
68+
benchmarks/linear-elastic-plate-with-hole/metadata4ing_provenance.zip
6669
6770
- name: Archive Linear Elastic plate with a hole benchmark data for nextflow
6871
uses: actions/upload-artifact@v4
6972
with:
7073
name: nextflow_results_linear-elastic-plate-with-hole
7174
path: |
7275
benchmarks/linear-elastic-plate-with-hole/nextflow_results/
76+
77+
process-artifacts:
78+
runs-on: ubuntu-latest
79+
needs: tests
80+
steps:
81+
- name: Checkout repo content
82+
uses: actions/checkout@v2
83+
84+
- name: Download artifact
85+
uses: actions/download-artifact@v4
86+
with:
87+
name: snakemake_results_linear-elastic-plate-with-hole
88+
path: ./artifact_files
89+
90+
- name: Unzip metadata4ing_provenance.zip
91+
run: |
92+
mkdir -p ./metadata4ing_provenance
93+
unzip -o ./artifact_files/metadata4ing_provenance.zip -d ./metadata4ing_provenance
94+
95+
- name: Setup Mambaforge with postprocessing env
96+
uses: conda-incubator/setup-miniconda@v3
97+
with:
98+
miniforge-version: latest
99+
activate-environment: postprocessing
100+
use-mamba: true
101+
environment-file: benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml
102+
103+
- name: Run plotting script
104+
shell: bash -l {0}
105+
run: |
106+
python benchmarks/linear-elastic-plate-with-hole/plot_provenance.py ./metadata4ing_provenance
107+
108+
- name: Upload PDF plot as artifact
109+
uses: actions/upload-artifact@v4
110+
with:
111+
name: element-size-vs-stress-plot
112+
path: element_size_vs_stress.pdf
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
name: postprocessing
22
channels:
33
- conda-forge
4+
- defaults
45

56
channel_priority: strict
67

78
dependencies:
89
- python=3.12
910
- pint
1011
- pyvista
12+
- rdflib
13+
- matplotlib
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import json
2+
import os
3+
from snakemake_report_plugin_metadata4ing.interfaces import (
4+
ParameterExtractorInterface,
5+
)
6+
7+
class ParameterExtractor(ParameterExtractorInterface):
8+
def extract_params(self, rule_name: str, file_path: str) -> dict:
9+
results = {}
10+
file_name = os.path.basename(file_path)
11+
if (
12+
file_name.startswith("parameters_")
13+
and file_name.endswith(".json")
14+
and (rule_name.startswith("postprocess_") or rule_name.startswith("run_"))
15+
):
16+
results.setdefault(rule_name, {}).setdefault("has parameter", [])
17+
with open(file_path) as f:
18+
data = json.load(f)
19+
for key, val in data.items():
20+
if isinstance(val, dict):
21+
results[rule_name]["has parameter"].append({key: {
22+
"value": val["value"],
23+
"unit": f"{val["unit"]}" if "unit" in val else None,
24+
"json-path": f"/{key}/value",
25+
"data-type": self._get_type(val["value"]),
26+
}})
27+
else:
28+
results[rule_name]["has parameter"].append({key: {
29+
"value": val,
30+
"unit": None,
31+
"json-path": f"/{key}",
32+
"data-type": self._get_type(val),
33+
}})
34+
elif (
35+
file_name.startswith("solution_")
36+
and file_name.endswith(".json")
37+
and (rule_name.startswith("postprocess_") or rule_name.startswith("run_"))
38+
):
39+
results.setdefault(rule_name, {}).setdefault("investigates", [])
40+
with open(file_path) as f:
41+
data = json.load(f)
42+
for key, val in data.items():
43+
if key == "max_von_mises_stress_nodes":
44+
results[rule_name]["investigates"].append({key: {
45+
"value": val,
46+
"unit": None,
47+
"json-path": f"/{key}",
48+
"data-type": "schema:Float",
49+
}})
50+
return results
51+
52+
def _get_type(self, val):
53+
if isinstance(val, float):
54+
return "schema:Float"
55+
elif isinstance(val, int):
56+
return "schema:Integer"
57+
elif isinstance(val, str):
58+
return "schema:Text"
59+
return None
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import os
2+
import argparse
3+
from rdflib import Graph
4+
import matplotlib.pyplot as plt
5+
from collections import defaultdict
6+
from generate_config import workflow_config
7+
8+
def load_graphs(base_dir):
9+
"""
10+
Walk through the base_dir and load all JSON-LD files into rdflib Graphs.
11+
"""
12+
graph_list = []
13+
for root, _, files in os.walk(base_dir):
14+
for file in files:
15+
if file.endswith(".jsonld"):
16+
file_path = os.path.join(root, file)
17+
try:
18+
g = Graph()
19+
g.parse(file_path, format='json-ld')
20+
graph_list.append(g)
21+
print(f"✅ Parsed: {file_path}")
22+
except Exception as e:
23+
print(f"❌ Failed to parse {file_path}: {e}")
24+
print(f"\nTotal graphs loaded: {len(graph_list)}")
25+
return graph_list
26+
27+
28+
def query_and_build_table(graph_list):
29+
"""
30+
Run SPARQL query on graphs and build a table.
31+
Returns headers and table_data.
32+
"""
33+
tools = workflow_config["tools"]
34+
filter_conditions = " || ".join(
35+
f'CONTAINS(LCASE(?tool_name), "{tool.lower()}")' for tool in tools
36+
)
37+
query = f"""
38+
PREFIX cr: <http://mlcommons.org/croissant/>
39+
PREFIX sio: <http://semanticscience.org/resource/>
40+
41+
SELECT DISTINCT ?value_element_size ?value_max_von_mises_stress_gauss_points ?tool_name
42+
WHERE {{
43+
?processing_step a schema:Action ;
44+
m4i:hasParameter ?element_size ;
45+
m4i:hasParameter ?element_order ;
46+
m4i:hasParameter ?element_degree ;
47+
m4i:investigates ?max_von_mises_stress_gauss_points ;
48+
schema:instrument ?tool .
49+
50+
?max_von_mises_stress_gauss_points a schema:PropertyValue ;
51+
rdfs:label "max_von_mises_stress_nodes" ;
52+
schema:value ?value_max_von_mises_stress_gauss_points .
53+
54+
?element_order a schema:PropertyValue ;
55+
rdfs:label "element_order" ;
56+
schema:value 1 .
57+
58+
?element_degree a schema:PropertyValue ;
59+
rdfs:label "element_degree" ;
60+
schema:value 1 .
61+
62+
?element_size a schema:PropertyValue ;
63+
rdfs:label "element_size" ;
64+
schema:value ?value_element_size .
65+
66+
?tool a schema:SoftwareApplication ;
67+
rdfs:label ?tool_name .
68+
69+
FILTER ({filter_conditions})
70+
}}
71+
"""
72+
73+
headers = [
74+
"element-size",
75+
"max-mises-stress",
76+
"Tool Name"
77+
]
78+
79+
table_data = []
80+
81+
for g in graph_list:
82+
results = g.query(query)
83+
for row in results:
84+
value_element_size = row.value_element_size
85+
value_max_von_mises_stress_gauss_points = row.value_max_von_mises_stress_gauss_points
86+
tool_name = row.tool_name
87+
table_data.append(
88+
[
89+
value_element_size,
90+
value_max_von_mises_stress_gauss_points,
91+
tool_name,
92+
]
93+
)
94+
95+
# Sort by element-size
96+
sort_key = headers.index("element-size")
97+
table_data.sort(key=lambda x: x[sort_key])
98+
99+
return headers, table_data
100+
101+
102+
def plot_element_size_vs_stress(headers, table_data, output_file="element_size_vs_stress.pdf"):
103+
"""Plots element-size vs max-mises-stress grouped by tool and saves as PDF."""
104+
105+
idx_element_size = headers.index("element-size")
106+
idx_stress = headers.index("max-mises-stress")
107+
idx_tool = headers.index("Tool Name")
108+
109+
grouped_data = defaultdict(list)
110+
x_tick_set = set()
111+
112+
for row in table_data:
113+
tool = row[idx_tool]
114+
x = float(row[idx_element_size])
115+
y = float(row[idx_stress])
116+
grouped_data[tool].append((x, y))
117+
x_tick_set.add(x)
118+
119+
# Sort x-tick labels
120+
x_ticks = sorted(x_tick_set)
121+
122+
plt.figure(figsize=(12, 5))
123+
for tool, values in grouped_data.items():
124+
values.sort()
125+
x_vals, y_vals = zip(*values)
126+
plt.plot(x_vals, y_vals, marker='o', linestyle='-', label=tool)
127+
128+
plt.xlabel("element-size")
129+
plt.ylabel("max-mises-stress")
130+
plt.title("element-size vs max-mises-stress by Tool\n(element-order = 1 , element-degree = 1)")
131+
plt.legend(title="Tool Name")
132+
plt.grid(True)
133+
134+
# Use logarithmic scale for x-axis
135+
plt.xscale('log')
136+
137+
# Set x-ticks to show original values
138+
plt.xticks(ticks=x_ticks, labels=[str(x) for x in x_ticks], rotation=45)
139+
plt.tight_layout()
140+
141+
# Save to PDF instead of showing
142+
plt.savefig(output_file)
143+
print(f"Plot saved as {output_file}")
144+
145+
146+
if __name__ == "__main__":
147+
parser = argparse.ArgumentParser(description="Process JSON-LD artifacts and display simulation results.")
148+
parser.add_argument("artifact_folder", type=str, help="Path to the folder containing unzipped artifacts")
149+
args = parser.parse_args()
150+
151+
graphs = load_graphs(args.artifact_folder)
152+
headers, table_data = query_and_build_table(graphs)
153+
plot_element_size_vs_stress(headers, table_data, output_file="element_size_vs_stress.pdf")

environment_benchmarks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ dependencies:
1111
- conda
1212
- pip
1313
- pip:
14-
- "--editable=git+https://github.com/izus-fokus/snakemake-report-plugin-metadata4ing.git#egg=snakemake-report-plugin-metadata4ing"
14+
- "git+https://github.com/izus-fokus/snakemake-report-plugin-metadata4ing@v1.0.0#egg=snakemake-report-plugin-metadata4ing"

0 commit comments

Comments
 (0)