Skip to content

Commit fc462bd

Browse files
committed
ADDED: FOOPs Query + Annotation Coverage Calculation
1 parent 745b8c0 commit fc462bd

File tree

2 files changed

+99
-122
lines changed

2 files changed

+99
-122
lines changed

data/Ontologies_forRepo.xlsx

949 Bytes
Binary file not shown.

data/source/ontology_characterisation_v31.ipynb

Lines changed: 99 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
"metadata": {},
77
"outputs": [],
88
"source": [
9-
"pip install rdflib\n",
10-
"pip install requests"
9+
"pip install rdflib"
1110
]
1211
},
1312
{
@@ -21,14 +20,10 @@
2120
"from pathlib import Path\n",
2221
"import json\n",
2322
"import numpy as np\n",
24-
"import re\n",
25-
"from collections import Counter\n",
2623
"from typing import Dict, List, Tuple, Optional\n",
2724
"from openpyxl import load_workbook\n",
28-
"from openpyxl.worksheet.table import Table, TableStyleInfo\n",
2925
"from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, URIRef\n",
30-
"from rdflib.namespace import DC, DCTERMS, SKOS\n",
31-
"from urllib.parse import urlparse"
26+
"from rdflib.namespace import DC, DCTERMS, SKOS"
3227
]
3328
},
3429
{
@@ -338,7 +333,10 @@
338333
" 'license': get_value([DCTERMS.license, DC.rights]),\n",
339334
" 'publisher': get_value([DCTERMS.publisher, DC.publisher]),\n",
340335
" 'version': get_value([OWL.versionInfo, DCTERMS.hasVersion]),\n",
341-
" 'preferred_prefix': get_value([VANN.preferredNamespacePrefix]),\n 'classes_count': sum(1 for _ in g.subjects(RDF.type, OWL.Class)),\n 'data_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.DatatypeProperty)),\n 'object_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.ObjectProperty))\n",
336+
" 'preferred_prefix': get_value([VANN.preferredNamespacePrefix]),\n",
337+
" 'classes_count': sum(1 for _ in g.subjects(RDF.type, OWL.Class)),\n",
338+
" 'data_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.DatatypeProperty)),\n",
339+
" 'object_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.ObjectProperty))\n",
342340
" }\n",
343341
"\n",
344342
" return metadata\n",
@@ -624,19 +622,41 @@
624622
" 'annotation_coverage_percent': None,\n",
625623
" 'linked_by_manual': linked_by_manual,\n",
626624
" 'linked_by_auto': None,\n",
627-
" 'linked_by_final': None,\n 'linked_by_aeco_auto': [],\n 'linked_by_aeco_final': None,\n",
628-
" 'FOOPs_manual': FOOPs_manual,\n 'FOOPs_auto': None,\n 'FOOPs_final': None,\n",
629-
" 'conforms_to_standards_manual': conforms_to_standards_manual,\n 'conforms_to_standards_auto': None,\n 'conforms_to_standards_final': None,\n",
630-
" 'conceptual_data_model_manual': conceptual_data_model_manual,\n 'conceptual_data_model_auto': None,\n 'conceptual_data_model_final': None,\n",
631-
" 'cluster_manual': cluster_manual,\n 'cluster_auto': None,\n 'cluster_final': None,\n",
632-
" 'reference_manual': reference_manual,\n 'reference_auto': None,\n 'reference_final': None,\n",
633-
" 'primary_domain_manual': primary_domain_manual,\n 'primary_domain_auto': None,\n 'primary_domain_final': None,\n",
634-
" 'secondary_domain_manual': secondary_domain_manual,\n 'secondary_domain_auto': None,\n 'secondary_domain_final': None,\n",
635-
" 'creator_auto': [],\n 'creator_final': None,\n",
636-
" 'publisher_auto': None,\n 'publisher_final': None,\n",
625+
" 'linked_by_final': None,\n",
626+
" 'linked_by_aeco_auto': [],\n",
627+
" 'linked_by_aeco_final': None,\n",
628+
" 'FOOPs_manual': FOOPs_manual,\n",
629+
" 'FOOPs_auto': None,\n",
630+
" 'FOOPs_final': None,\n",
631+
" 'conforms_to_standards_manual': conforms_to_standards_manual,\n",
632+
" 'conforms_to_standards_auto': None,\n",
633+
" 'conforms_to_standards_final': None,\n",
634+
" 'conceptual_data_model_manual': conceptual_data_model_manual,\n",
635+
" 'conceptual_data_model_auto': None,\n",
636+
" 'conceptual_data_model_final': None,\n",
637+
" 'cluster_manual': cluster_manual,\n",
638+
" 'cluster_auto': None,\n",
639+
" 'cluster_final': None,\n",
640+
" 'reference_manual': reference_manual,\n",
641+
" 'reference_auto': None,\n",
642+
" 'reference_final': None,\n",
643+
" 'primary_domain_manual': primary_domain_manual,\n",
644+
" 'primary_domain_auto': None,\n",
645+
" 'primary_domain_final': None,\n",
646+
" 'secondary_domain_manual': secondary_domain_manual,\n",
647+
" 'secondary_domain_auto': None,\n",
648+
" 'secondary_domain_final': None,\n",
649+
" 'creator_auto': [],\n",
650+
" 'creator_final': None,\n",
651+
" 'publisher_auto': None,\n",
652+
" 'publisher_final': None,\n",
637653
" 'referenced_ontologies': [],\n",
638-
" 'classes_count_auto': None,\n 'classes_count_final': None,\n",
639-
" 'data_properties_count_auto': None,\n 'data_properties_count_final': None,\n 'object_properties_count_auto': None,\n 'object_properties_count_final': None,\n",
654+
" 'classes_count_auto': None,\n",
655+
" 'classes_count_final': None,\n",
656+
" 'data_properties_count_auto': None,\n",
657+
" 'data_properties_count_final': None,\n",
658+
" 'object_properties_count_auto': None,\n",
659+
" 'object_properties_count_final': None,\n",
640660
" }\n",
641661
"\n",
642662
" # Normalize yes/no to boolean for _manual fields\n",
@@ -675,7 +695,8 @@
675695
" result['publisher_auto'] = metadata['publisher']\n",
676696
"\n",
677697
" result['classes_count_auto'] = metadata['classes_count']\n",
678-
" result['data_properties_count_auto'] = metadata['data_properties_count']\n result['object_properties_count_auto'] = metadata['object_properties_count']\n",
698+
" result['data_properties_count_auto'] = metadata['data_properties_count']\n",
699+
" result['object_properties_count_auto'] = metadata['object_properties_count']\n",
679700
"\n",
680701
" # Extract annotation coverage\n",
681702
" annotation_info = extract_annotation_coverage(ttl_file)\n",
@@ -851,7 +872,8 @@
851872
"\n",
852873
" # Count fields\n",
853874
" onto['classes_count_final'] = onto.get('classes_count_auto')\n",
854-
" onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n",
875+
" onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n",
876+
" onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n",
855877
"\n",
856878
" # Creator and publisher\n",
857879
" onto['creator_final'] = onto.get('creator_auto')\n",
@@ -885,7 +907,8 @@
885907
"\n",
886908
" # Count fields\n",
887909
" onto['classes_count_final'] = onto.get('classes_count_auto')\n",
888-
" onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n",
910+
" onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n",
911+
" onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n",
889912
"\n",
890913
" # Creator and publisher\n",
891914
" onto['creator_final'] = onto.get('creator_auto')\n",
@@ -1152,9 +1175,12 @@
11521175
" if documentation and documentation == True or str(documentation).lower() in ['yes', 'true']:\n",
11531176
" quality[0] = 1\n",
11541177
" \n",
1155-
" annotation = onto.get('annotation_final', '')\n",
1156-
" if annotation and annotation == True or str(annotation).lower() in ['yes', 'true']:\n",
1157-
" quality[1] = 1\n",
1178+
" #annotation = onto.get('annotation_final', '')\n",
1179+
" #if annotation and annotation == True or str(annotation).lower() in ['yes', 'true']:\n",
1180+
" # quality[1] = 1\n",
1181+
" annotation_coverage = onto.get('annotation_coverage_percent', '')\n",
1182+
" if annotation_coverage:\n",
1183+
" quality[1] = annotation_coverage / 100.0\n",
11581184
" \n",
11591185
" onto['score_quality'] = sum(quality)\n"
11601186
]
@@ -1179,13 +1205,6 @@
11791205
"#write_output_JSON(ontologies, output_json)"
11801206
]
11811207
},
1182-
{
1183-
"cell_type": "markdown",
1184-
"metadata": {},
1185-
"source": [
1186-
"# FOOPs Assessment"
1187-
]
1188-
},
11891208
{
11901209
"cell_type": "code",
11911210
"execution_count": null,
@@ -1199,19 +1218,11 @@
11991218
" \"Content-Type\": \"application/json;charset=UTF-8\",\n",
12001219
"}\n",
12011220
"\n",
1202-
"for index, row in df_ontologies.iterrows():\n",
1203-
" acronym = row['Acronym']\n",
1204-
" uri = row['URI/Namespace']\n",
1205-
" name = row['Name']\n",
1206-
" licensing = row['Licensing']\n",
1221+
"def get_foops_score(uri):\n",
12071222
"\n",
1208-
" print(f\"acronym: {acronym}:\")\n",
1209-
" print(f\" uri: {uri}; name: {name}; license: {licensing}\")\n",
1223+
" foops_score = -1.0\n",
12101224
"\n",
12111225
" if uri == 'n/a' or pd.isnull(uri):\n",
1212-
" auto_uri = \"\"\n",
1213-
" auto_name = \"\"\n",
1214-
" auto_licensing = \"\"\n",
12151226
" foops_score = 0.0\n",
12161227
" else:\n",
12171228
" try:\n",
@@ -1221,66 +1232,54 @@
12211232
" print(response)\n",
12221233
"\n",
12231234
" auto_uri = response['ontology_URI']\n",
1224-
" auto_name = response['ontology_title']\n",
1225-
" auto_licensing = response['ontology_license']\n",
1235+
" auto_title = response['ontology_title']\n",
1236+
" #auto_licensing = response['ontology_license']\n",
12261237
" foops_score = response['overall_score']\n",
1227-
" checks = response['checks']\n",
1238+
" #checks = response['checks']\n",
12281239
"\n",
12291240
" except requests.exceptions.HTTPError as e:\n",
12301241
" print(\"HTTP error:\", e.response.status_code, e.response.text)\n",
1231-
" auto_uri, auto_name, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
1242+
" auto_uri, auto_title, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
12321243
"\n",
12331244
" except requests.exceptions.ConnectionError:\n",
12341245
" print(\"Error: Failed to connect to the server\")\n",
1235-
" auto_uri, auto_name, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
1246+
" auto_uri, auto_title, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
12361247
"\n",
12371248
" except requests.exceptions.Timeout:\n",
12381249
" print(\"Error: Request timed out\")\n",
1239-
" auto_uri, auto_name, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
1250+
" auto_uri, auto_title, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
12401251
"\n",
12411252
" except requests.exceptions.RequestException as e:\n",
12421253
" print(\"Unexpected error:\", str(e))\n",
1243-
" auto_uri, auto_name, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
1244-
"\n",
1245-
" print(f\" auto_uri: {auto_uri}; auto_name: {auto_name}; auto_license: {auto_licensing}; foops_score: {foops_score}\")\n",
1246-
"\n",
1247-
" if auto_name == \"unknown\":\n",
1248-
" auto_name = \"\"\n",
1249-
" print(f\" auto_name from FOOPs is empty!\")\n",
1250-
" elif auto_name != name:\n",
1251-
" print(f\" Fix name to auto_name: {auto_name}\")\n",
1252-
"\n",
1253-
" if auto_uri == \"unknown\":\n",
1254-
" auto_uri = \"\"\n",
1255-
" print(f\" auto_uri from FOOPs is empty!\")\n",
1256-
" elif auto_uri != uri:\n",
1257-
" print(f\" Fix uri to auto_uri: {auto_uri}\")\n",
1258-
"\n",
1259-
" if auto_licensing == \"unknown\":\n",
1260-
" auto_licensing = \"\"\n",
1261-
" print(f\" auto_licensing from FOOPs is empty!\")\n",
1262-
" elif auto_licensing != licensing:\n",
1263-
" print(f\" Fix licensing to auto_licensing: {auto_licensing}\")\n",
1264-
"\n",
1265-
" df_ontologies.loc[index, 'Auto_Name'] = auto_name\n",
1266-
" df_ontologies.loc[index, 'Auto_Licensing'] = auto_licensing\n",
1267-
" df_ontologies.loc[index, 'FOOPs_Score'] = foops_score"
1268-
]
1269-
},
1270-
{
1271-
"cell_type": "code",
1272-
"execution_count": null,
1273-
"metadata": {},
1274-
"outputs": [],
1275-
"source": [
1276-
"print(df_ontologies)"
1277-
]
1278-
},
1279-
{
1280-
"cell_type": "markdown",
1281-
"metadata": {},
1282-
"source": [
1283-
"# Save Final Output"
1254+
" auto_uri, auto_title, auto_licensing, foops_score = \"\", \"\", \"\", 0.0\n",
1255+
"\n",
1256+
" print(f\" auto_uri: {auto_uri}; auto_title: {auto_title}; foops_score: {foops_score}\")\n",
1257+
"\n",
1258+
" return foops_score\n",
1259+
"\n",
1260+
"def process_ontologies_foops(ontologies, debug=False):\n",
1261+
"\n",
1262+
" for index, onto in enumerate(ontologies):\n",
1263+
" #if index > 5:\n",
1264+
" # continue\n",
1265+
"\n",
1266+
" prefix = onto['prefix_final']\n",
1267+
" uri = onto['uri']\n",
1268+
" title = onto['title_final']\n",
1269+
"\n",
1270+
" print(f\"prefix: {prefix}; title: {title}; uri: {uri}\")\n",
1271+
" \n",
1272+
" foops_score = get_foops_score(uri)\n",
1273+
" \n",
1274+
" onto['FOOPs_auto'] = foops_score\n",
1275+
" if foops_score == -1.0:\n",
1276+
" onto['FOOPs_auto'] = None\n",
1277+
" onto['FOOPs_final'] = onto['FOOPs_manual']\n",
1278+
" else:\n",
1279+
" onto['FOOPs_auto'] = foops_score\n",
1280+
" onto['FOOPs_final'] = foops_score\n",
1281+
" \n",
1282+
" return ontologies\n"
12841283
]
12851284
},
12861285
{
@@ -1289,37 +1288,15 @@
12891288
"metadata": {},
12901289
"outputs": [],
12911290
"source": [
1292-
"file_path_output = \"C:/Users/fbosche/University College London/EC3 - 1. Modelling and Standards - 1. Modelling and Standards/Material/Project D_Ontologies/Scripts/Ontologies_forRepo_2025.11.24_Output.xlsx\"\n",
1293-
"\n",
1294-
"# Write DataFrame (without index)\n",
1295-
"df_ontologies.to_excel(file_path_output, sheet_name=sheet_name, index=False)\n",
1296-
"\n",
1297-
"# Open with openpyxl and convert to Table\n",
1298-
"wb = load_workbook(file_path_output)\n",
1299-
"ws = wb[sheet_name]\n",
1300-
"\n",
1301-
"# Define range (from A1 to last cell)\n",
1302-
"end_col = ws.max_column\n",
1303-
"end_row = ws.max_row\n",
1304-
"end_cell = ws.cell(row=end_row, column=end_col).coordinate\n",
1305-
"table_ref = f\"A1:{end_cell}\"\n",
1306-
"\n",
1307-
"# Create table with style\n",
1308-
"table = Table(displayName=\"OntologyTable\", ref=table_ref)\n",
1309-
"style = TableStyleInfo(\n",
1310-
" name=\"TableStyleMedium9\",\n",
1311-
" showFirstColumn=False,\n",
1312-
" showLastColumn=False,\n",
1313-
" showRowStripes=True,\n",
1314-
" showColumnStripes=False\n",
1315-
")\n",
1316-
"table.tableStyleInfo = style\n",
1317-
"\n",
1318-
"# Add table to sheet and save\n",
1319-
"ws.add_table(table)\n",
1320-
"wb.save(file_path_output)\n",
1321-
"\n",
1322-
"print(f\"Output saved to {file_path_output}\")"
1291+
"# Process all ontologies (set debug=True to see detailed prefix/URI matching)\n",
1292+
"ontologies = process_ontologies_foops(ontologies, debug=False)\n",
1293+
"\n",
1294+
"# Write outputs\n",
1295+
"filepath = Path(file_path_ontologies)\n",
1296+
"output_excel = filepath.parent / \"Ontologies_forRepo.xlsx\"\n",
1297+
"write_output_EXCEL(ontologies, output_excel)\n",
1298+
"#output_json = filepath.parent / \"Ontologies_forRepo.json\"\n",
1299+
"#write_output_JSON(ontologies, output_json)\n"
13231300
]
13241301
}
13251302
],
@@ -1344,4 +1321,4 @@
13441321
},
13451322
"nbformat": 4,
13461323
"nbformat_minor": 2
1347-
}
1324+
}

0 commit comments

Comments
 (0)