add basis ROHub test cases, just for information exchange

joergfunger · joergfunger · commit 1e0e74c90475 · 2025-10-06T16:02:46.000+02:00
diff --git a/src/ROhub/.env b/src/ROhub/.env
@@ -0,0 +1,7 @@
+[API_SECTION]
+API_URL = https://rohub2020-rohub.apps.paas-dev.psnc.pl/api/
+
+[KEYCLOAK_SECTION]
+KEYCLOAK_CLIENT_ID = rohub2020-cli
+KEYCLOAK_CLIENT_SECRET = 714617a7-87bc-4a88-8682-5f9c2f60337d
+KEYCLOAK_URL = https://keycloak-dev.apps.paas-dev.psnc.pl/auth/realms/rohub/protocol/openid-connect/token
diff --git a/src/ROhub/ROhub_env.yml b/src/ROhub/ROhub_env.yml
@@ -0,0 +1,9 @@
+name: rohub
+channels:
+  - conda-forge
+dependencies:
+  - python=3.12
+  - sparqlwrapper
+  - pip
+  - pip:
+    - "--editable=git+https://gitlab.pcss.pl/daisd-public/rohub/rohub-api.git#egg=rohub"
diff --git a/src/ROhub/Readme.md b/src/ROhub/Readme.md
@@ -0,0 +1,11 @@
+# Installation for using the package ROhub with the development server
+Install the package via pip (in conda) using the options
+
+`--editable=git+https://gitlab.pcss.pl/daisd-public/rohub/rohub-api.git#egg=rohub`
+
+Find the location of the local installation (`pip show rohub`) and copy the file `.env` into this directory using a single command:
+
+```bash
+cp -v .env "$(pip show rohub | awk -F': ' '/Editable project location/ {print $2}')/.env"
+```
+
diff --git a/src/ROhub/test_import.py b/src/ROhub/test_import.py
@@ -0,0 +1,50 @@
+import rohub
+import pandas as pd
+
+print("ROHub API URL:", rohub.settings.API_URL)
+# loading credentials from external file
+user_name = "joerg.unger@bam.de"
+user_pwd = open("passwordROHub.txt").read()
+rohub.login(
+    username=user_name, password=user_pwd
+)  # Ensure you are logged in to access ROHub features
+
+
+# zip_path = "./metadata4ing_provenance.zip"
+zip_path = "./nextflow_results_linear-elastic-plate-with-hole.zip"
+resources_from_zip = rohub.ros_upload(path_to_zip=zip_path)
+
+my_ros = rohub.list_my_ros()
+if len(my_ros) == 0:
+    print("Error fetching RO with id:", id, "creating a new one", e)
+    ro_title = "The influence of eating habits on sleep"
+    ro_research_areas = ["Medical science"]
+    ro = rohub.ros_create(title=ro_title, research_areas=ro_research_areas)
+    my_ros = rohub.list_my_ros()
+
+
+# Assuming df is your pandas DataFrame with an "identifier" column
+for index, row in my_ros.iterrows():
+    id = row["identifier"]
+    ro = rohub.ros_load(id)
+    print("RO type:", ro.ros_type)
+    if hasattr(ro, "title") and ro.title:
+        print("RO title:", ro.title)
+    if hasattr(ro, "authors") and ro.authors:
+        print("RO authors:", ro.authors)
+    if hasattr(ro, "description") and ro.description:
+        print("RO description:", ro.description)
+    if hasattr(ro, "research_areas") and ro.research_areas:
+        print("RO research areas:", ro.research_areas)
+    if hasattr(ro, "creation_date") and ro.creation_date:
+        print("RO creation date:", ro.creation_date)
+    if hasattr(ro, "last_modified_date") and ro.last_modified_date:
+        print("RO last modified date:", ro.last_modified_date)
+    if hasattr(ro, "doi") and ro.doi:
+        print("RO DOI:", ro.doi)
+    if hasattr(ro, "url") and ro.url:
+        print("RO URL:", ro.url)
+    if hasattr(ro, "metadata") and ro.metadata:
+        print("RO metadata:", ro.metadata)
+    print("RO metadata:", ro.show_metadata())
+    print("---------------------------------")
diff --git a/src/ROhub/test_queryROhub.py b/src/ROhub/test_queryROhub.py
@@ -0,0 +1,40 @@
+from SPARQLWrapper import SPARQLWrapper, JSON
+
+# SPARQL endpoint
+sparql = SPARQLWrapper(
+    "https://rohub2020-api-virtuoso-route-rohub.apps.paas-dev.psnc.pl/sparql/"
+)
+
+# Find datasets that conform to Workflow RO-Crate 1.0
+query = """
+PREFIX schema: <http://schema.org/>
+PREFIX dct: <http://purl.org/dc/terms/>
+SELECT ?dataset ?datePublished ?author ?part WHERE {
+    ?dataset a schema:Dataset .
+    ?dataset dct:conformsTo <https://w3id.org/workflowhub/workflow-ro-crate/1.0> .
+    ?dataset schema:author ?author .
+    FILTER (?author = <https://orcid.org/0000-0000-0000-0000>)
+    OPTIONAL { ?dataset schema:datePublished ?datePublished }
+    OPTIONAL { ?dataset schema:hasPart ?part }
+}
+"""
+
+sparql.setQuery(query)
+sparql.setReturnFormat(JSON)
+
+try:
+    resp = sparql.query().convert()
+    bindings = resp.get("results", {}).get("bindings", [])
+
+    print("Dataset Query Results:")
+    print("========================================")
+    # Print directly per row instead of aggregating into a structure
+    for row in bindings:
+        print(f"Dataset: {row.get('dataset', {}).get('value', 'None')}")
+        print(f"Date Published: {row.get('datePublished', {}).get('value', 'None')}")
+        print(f"Author: {row.get('author', {}).get('value', 'None')}")
+        print(f"Part: {row.get('part', {}).get('value', 'None')}")
+        print("-" * 40)
+
+except Exception as e:
+    print(f"Error executing SPARQL query: {e}")
diff --git a/src/ROhub/test_query_local.py b/src/ROhub/test_query_local.py
@@ -0,0 +1,58 @@
+from rdflib import Graph, URIRef, Namespace
+
+# Load RO-Crate metadata
+graph = Graph()
+graph.parse("ro-crate-metadata.json", format="json-ld")
+
+# Define namespaces
+SCHEMA = Namespace("http://schema.org/")
+
+try:
+    # Show datasets and their dct:conformsTo values
+    debug_query = """
+    PREFIX schema: <http://schema.org/>
+    PREFIX dct: <http://purl.org/dc/terms/>
+    SELECT ?dataset ?ct WHERE {
+        ?dataset a schema:Dataset .
+        OPTIONAL { ?dataset dct:conformsTo ?ct }
+    }
+    """
+    print("DEBUG: All datasets and their dct:conformsTo objects")
+    print("=" * 60)
+    for row in graph.query(debug_query):
+        print(f"Dataset: {row.dataset}")
+        print(f"ConformsTo: {row.ct if row.ct else 'None'}")
+        print("-" * 40)
+
+    # Find datasets that conform to Workflow RO-Crate 1.0
+    query = """
+        PREFIX schema: <http://schema.org/>
+        PREFIX dct: <http://purl.org/dc/terms/>
+        SELECT ?dataset ?datePublished ?author ?part WHERE {
+            ?dataset a schema:Dataset .
+            ?dataset dct:conformsTo <https://w3id.org/workflowhub/workflow-ro-crate/1.0> .
+            ?dataset schema:author ?author .
+            FILTER (?author = <https://orcid.org/0000-0000-0000-0000>)
+            OPTIONAL { ?dataset schema:datePublished ?datePublished }
+            OPTIONAL { ?dataset schema:hasPart ?part }
+        }
+    """
+    results = graph.query(query)
+
+    print("\nWorkflowHub Dataset Query Results:")
+    print("========================================")
+    # Print directly per row instead of aggregating into a structure
+    found_any = False
+    for row in results:
+        found_any = True
+        print(f"Dataset: {row.dataset}")
+        print(f"Date Published: {row.datePublished if row.datePublished else 'None'}")
+        print(f"Author: {row.author if row.author else 'None'}")
+        print(f"Part: {row.part if row.part else 'None'}")
+        print("-" * 40)
+
+    if not found_any:
+        print("No datasets found that conform to Workflow RO-Crate 1.0")
+
+except Exception as e:
+    print(f"Error: {e}")