66 "metadata" : {},
77 "outputs" : [],
88 "source" : [
9- " pip install rdflib\n " ,
10- " pip install requests"
9+ " pip install rdflib"
1110 ]
1211 },
1312 {
2120 " from pathlib import Path\n " ,
2221 " import json\n " ,
2322 " import numpy as np\n " ,
24- " import re\n " ,
25- " from collections import Counter\n " ,
2623 " from typing import Dict, List, Tuple, Optional\n " ,
2724 " from openpyxl import load_workbook\n " ,
28- " from openpyxl.worksheet.table import Table, TableStyleInfo\n " ,
2925 " from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, URIRef\n " ,
30- " from rdflib.namespace import DC, DCTERMS, SKOS\n " ,
31- " from urllib.parse import urlparse"
26+ " from rdflib.namespace import DC, DCTERMS, SKOS"
3227 ]
3328 },
3429 {
338333 " 'license': get_value([DCTERMS.license, DC.rights]),\n " ,
339334 " 'publisher': get_value([DCTERMS.publisher, DC.publisher]),\n " ,
340335 " 'version': get_value([OWL.versionInfo, DCTERMS.hasVersion]),\n " ,
341- " 'preferred_prefix': get_value([VANN.preferredNamespacePrefix]),\n 'classes_count': sum(1 for _ in g.subjects(RDF.type, OWL.Class)),\n 'data_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.DatatypeProperty)),\n 'object_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.ObjectProperty))\n " ,
336+ " 'preferred_prefix': get_value([VANN.preferredNamespacePrefix]),\n " ,
337+ " 'classes_count': sum(1 for _ in g.subjects(RDF.type, OWL.Class)),\n " ,
338+ " 'data_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.DatatypeProperty)),\n " ,
339+ " 'object_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.ObjectProperty))\n " ,
342340 " }\n " ,
343341 " \n " ,
344342 " return metadata\n " ,
624622 " 'annotation_coverage_percent': None,\n " ,
625623 " 'linked_by_manual': linked_by_manual,\n " ,
626624 " 'linked_by_auto': None,\n " ,
627- " 'linked_by_final': None,\n 'linked_by_aeco_auto': [],\n 'linked_by_aeco_final': None,\n " ,
628- " 'FOOPs_manual': FOOPs_manual,\n 'FOOPs_auto': None,\n 'FOOPs_final': None,\n " ,
629- " 'conforms_to_standards_manual': conforms_to_standards_manual,\n 'conforms_to_standards_auto': None,\n 'conforms_to_standards_final': None,\n " ,
630- " 'conceptual_data_model_manual': conceptual_data_model_manual,\n 'conceptual_data_model_auto': None,\n 'conceptual_data_model_final': None,\n " ,
631- " 'cluster_manual': cluster_manual,\n 'cluster_auto': None,\n 'cluster_final': None,\n " ,
632- " 'reference_manual': reference_manual,\n 'reference_auto': None,\n 'reference_final': None,\n " ,
633- " 'primary_domain_manual': primary_domain_manual,\n 'primary_domain_auto': None,\n 'primary_domain_final': None,\n " ,
634- " 'secondary_domain_manual': secondary_domain_manual,\n 'secondary_domain_auto': None,\n 'secondary_domain_final': None,\n " ,
635- " 'creator_auto': [],\n 'creator_final': None,\n " ,
636- " 'publisher_auto': None,\n 'publisher_final': None,\n " ,
625+ " 'linked_by_final': None,\n " ,
626+ " 'linked_by_aeco_auto': [],\n " ,
627+ " 'linked_by_aeco_final': None,\n " ,
628+ " 'FOOPs_manual': FOOPs_manual,\n " ,
629+ " 'FOOPs_auto': None,\n " ,
630+ " 'FOOPs_final': None,\n " ,
631+ " 'conforms_to_standards_manual': conforms_to_standards_manual,\n " ,
632+ " 'conforms_to_standards_auto': None,\n " ,
633+ " 'conforms_to_standards_final': None,\n " ,
634+ " 'conceptual_data_model_manual': conceptual_data_model_manual,\n " ,
635+ " 'conceptual_data_model_auto': None,\n " ,
636+ " 'conceptual_data_model_final': None,\n " ,
637+ " 'cluster_manual': cluster_manual,\n " ,
638+ " 'cluster_auto': None,\n " ,
639+ " 'cluster_final': None,\n " ,
640+ " 'reference_manual': reference_manual,\n " ,
641+ " 'reference_auto': None,\n " ,
642+ " 'reference_final': None,\n " ,
643+ " 'primary_domain_manual': primary_domain_manual,\n " ,
644+ " 'primary_domain_auto': None,\n " ,
645+ " 'primary_domain_final': None,\n " ,
646+ " 'secondary_domain_manual': secondary_domain_manual,\n " ,
647+ " 'secondary_domain_auto': None,\n " ,
648+ " 'secondary_domain_final': None,\n " ,
649+ " 'creator_auto': [],\n " ,
650+ " 'creator_final': None,\n " ,
651+ " 'publisher_auto': None,\n " ,
652+ " 'publisher_final': None,\n " ,
637653 " 'referenced_ontologies': [],\n " ,
638- " 'classes_count_auto': None,\n 'classes_count_final': None,\n " ,
639- " 'data_properties_count_auto': None,\n 'data_properties_count_final': None,\n 'object_properties_count_auto': None,\n 'object_properties_count_final': None,\n " ,
654+ " 'classes_count_auto': None,\n " ,
655+ " 'classes_count_final': None,\n " ,
656+ " 'data_properties_count_auto': None,\n " ,
657+ " 'data_properties_count_final': None,\n " ,
658+ " 'object_properties_count_auto': None,\n " ,
659+ " 'object_properties_count_final': None,\n " ,
640660 " }\n " ,
641661 " \n " ,
642662 " # Normalize yes/no to boolean for _manual fields\n " ,
675695 " result['publisher_auto'] = metadata['publisher']\n " ,
676696 " \n " ,
677697 " result['classes_count_auto'] = metadata['classes_count']\n " ,
678- " result['data_properties_count_auto'] = metadata['data_properties_count']\n result['object_properties_count_auto'] = metadata['object_properties_count']\n " ,
698+ " result['data_properties_count_auto'] = metadata['data_properties_count']\n " ,
699+ " result['object_properties_count_auto'] = metadata['object_properties_count']\n " ,
679700 " \n " ,
680701 " # Extract annotation coverage\n " ,
681702 " annotation_info = extract_annotation_coverage(ttl_file)\n " ,
851872 " \n " ,
852873 " # Count fields\n " ,
853874 " onto['classes_count_final'] = onto.get('classes_count_auto')\n " ,
854- " onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n " ,
875+ " onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n " ,
876+ " onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n " ,
855877 " \n " ,
856878 " # Creator and publisher\n " ,
857879 " onto['creator_final'] = onto.get('creator_auto')\n " ,
885907 " \n " ,
886908 " # Count fields\n " ,
887909 " onto['classes_count_final'] = onto.get('classes_count_auto')\n " ,
888- " onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n " ,
910+ " onto['data_properties_count_final'] = onto.get('data_properties_count_auto')\n " ,
911+ " onto['object_properties_count_final'] = onto.get('object_properties_count_auto')\n " ,
889912 " \n " ,
890913 " # Creator and publisher\n " ,
891914 " onto['creator_final'] = onto.get('creator_auto')\n " ,
11521175 " if documentation and documentation == True or str(documentation).lower() in ['yes', 'true']:\n " ,
11531176 " quality[0] = 1\n " ,
11541177 " \n " ,
1155- " annotation = onto.get('annotation_final', '')\n " ,
1156- " if annotation and annotation == True or str(annotation).lower() in ['yes', 'true']:\n " ,
1157- " quality[1] = 1\n " ,
1178+ " #annotation = onto.get('annotation_final', '')\n " ,
1179+ " #if annotation and annotation == True or str(annotation).lower() in ['yes', 'true']:\n " ,
1180+ " # quality[1] = 1\n " ,
1181+ " annotation_coverage = onto.get('annotation_coverage_percent', '')\n " ,
1182+ " if annotation_coverage:\n " ,
1183+ " quality[1] = annotation_coverage / 100.0\n " ,
11581184 " \n " ,
11591185 " onto['score_quality'] = sum(quality)\n "
11601186 ]
11791205 " #write_output_JSON(ontologies, output_json)"
11801206 ]
11811207 },
1182- {
1183- "cell_type" : " markdown" ,
1184- "metadata" : {},
1185- "source" : [
1186- " # FOOPs Assessment"
1187- ]
1188- },
11891208 {
11901209 "cell_type" : " code" ,
11911210 "execution_count" : null ,
11991218 " \" Content-Type\" : \" application/json;charset=UTF-8\" ,\n " ,
12001219 " }\n " ,
12011220 " \n " ,
1202- " for index, row in df_ontologies.iterrows():\n " ,
1203- " acronym = row['Acronym']\n " ,
1204- " uri = row['URI/Namespace']\n " ,
1205- " name = row['Name']\n " ,
1206- " licensing = row['Licensing']\n " ,
1221+ " def get_foops_score(uri):\n " ,
12071222 " \n " ,
1208- " print(f\" acronym: {acronym}:\" )\n " ,
1209- " print(f\" uri: {uri}; name: {name}; license: {licensing}\" )\n " ,
1223+ " foops_score = -1.0\n " ,
12101224 " \n " ,
12111225 " if uri == 'n/a' or pd.isnull(uri):\n " ,
1212- " auto_uri = \"\"\n " ,
1213- " auto_name = \"\"\n " ,
1214- " auto_licensing = \"\"\n " ,
12151226 " foops_score = 0.0\n " ,
12161227 " else:\n " ,
12171228 " try:\n " ,
12211232 " print(response)\n " ,
12221233 " \n " ,
12231234 " auto_uri = response['ontology_URI']\n " ,
1224- " auto_name = response['ontology_title']\n " ,
1225- " auto_licensing = response['ontology_license']\n " ,
1235+ " auto_title = response['ontology_title']\n " ,
1236+ " # auto_licensing = response['ontology_license']\n " ,
12261237 " foops_score = response['overall_score']\n " ,
1227- " checks = response['checks']\n " ,
1238+ " # checks = response['checks']\n " ,
12281239 " \n " ,
12291240 " except requests.exceptions.HTTPError as e:\n " ,
12301241 " print(\" HTTP error:\" , e.response.status_code, e.response.text)\n " ,
1231- " auto_uri, auto_name , auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
1242+ " auto_uri, auto_title , auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
12321243 " \n " ,
12331244 " except requests.exceptions.ConnectionError:\n " ,
12341245 " print(\" Error: Failed to connect to the server\" )\n " ,
1235- " auto_uri, auto_name , auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
1246+ " auto_uri, auto_title , auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
12361247 " \n " ,
12371248 " except requests.exceptions.Timeout:\n " ,
12381249 " print(\" Error: Request timed out\" )\n " ,
1239- " auto_uri, auto_name , auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
1250+ " auto_uri, auto_title , auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
12401251 " \n " ,
12411252 " except requests.exceptions.RequestException as e:\n " ,
12421253 " print(\" Unexpected error:\" , str(e))\n " ,
1243- " auto_uri, auto_name, auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
1244- " \n " ,
1245- " print(f\" auto_uri: {auto_uri}; auto_name: {auto_name}; auto_license: {auto_licensing}; foops_score: {foops_score}\" )\n " ,
1246- " \n " ,
1247- " if auto_name == \" unknown\" :\n " ,
1248- " auto_name = \"\"\n " ,
1249- " print(f\" auto_name from FOOPs is empty!\" )\n " ,
1250- " elif auto_name != name:\n " ,
1251- " print(f\" Fix name to auto_name: {auto_name}\" )\n " ,
1252- " \n " ,
1253- " if auto_uri == \" unknown\" :\n " ,
1254- " auto_uri = \"\"\n " ,
1255- " print(f\" auto_uri from FOOPs is empty!\" )\n " ,
1256- " elif auto_uri != uri:\n " ,
1257- " print(f\" Fix uri to auto_uri: {auto_uri}\" )\n " ,
1258- " \n " ,
1259- " if auto_licensing == \" unknown\" :\n " ,
1260- " auto_licensing = \"\"\n " ,
1261- " print(f\" auto_licensing from FOOPs is empty!\" )\n " ,
1262- " elif auto_licensing != licensing:\n " ,
1263- " print(f\" Fix licensing to auto_licensing: {auto_licensing}\" )\n " ,
1264- " \n " ,
1265- " df_ontologies.loc[index, 'Auto_Name'] = auto_name\n " ,
1266- " df_ontologies.loc[index, 'Auto_Licensing'] = auto_licensing\n " ,
1267- " df_ontologies.loc[index, 'FOOPs_Score'] = foops_score"
1268- ]
1269- },
1270- {
1271- "cell_type" : " code" ,
1272- "execution_count" : null ,
1273- "metadata" : {},
1274- "outputs" : [],
1275- "source" : [
1276- " print(df_ontologies)"
1277- ]
1278- },
1279- {
1280- "cell_type" : " markdown" ,
1281- "metadata" : {},
1282- "source" : [
1283- " # Save Final Output"
1254+ " auto_uri, auto_title, auto_licensing, foops_score = \"\" , \"\" , \"\" , 0.0\n " ,
1255+ " \n " ,
1256+ " print(f\" auto_uri: {auto_uri}; auto_title: {auto_title}; foops_score: {foops_score}\" )\n " ,
1257+ " \n " ,
1258+ " return foops_score\n " ,
1259+ " \n " ,
1260+ " def process_ontologies_foops(ontologies, debug=False):\n " ,
1261+ " \n " ,
1262+ " for index, onto in enumerate(ontologies):\n " ,
1263+ " #if index > 5:\n " ,
1264+ " # continue\n " ,
1265+ " \n " ,
1266+ " prefix = onto['prefix_final']\n " ,
1267+ " uri = onto['uri']\n " ,
1268+ " title = onto['title_final']\n " ,
1269+ " \n " ,
1270+ " print(f\" prefix: {prefix}; title: {title}; uri: {uri}\" )\n " ,
1271+ " \n " ,
1272+ " foops_score = get_foops_score(uri)\n " ,
1273+ " \n " ,
1274+ " onto['FOOPs_auto'] = foops_score\n " ,
1275+ " if foops_score == -1.0:\n " ,
1276+ " onto['FOOPs_auto'] = None\n " ,
1277+ " onto['FOOPs_final'] = onto['FOOPs_manual']\n " ,
1278+ " else:\n " ,
1279+ " onto['FOOPs_auto'] = foops_score\n " ,
1280+ " onto['FOOPs_final'] = foops_score\n " ,
1281+ " \n " ,
1282+ " return ontologies\n "
12841283 ]
12851284 },
12861285 {
12891288 "metadata" : {},
12901289 "outputs" : [],
12911290 "source" : [
1292- " file_path_output = \" C:/Users/fbosche/University College London/EC3 - 1. Modelling and Standards - 1. Modelling and Standards/Material/Project D_Ontologies/Scripts/Ontologies_forRepo_2025.11.24_Output.xlsx\"\n " ,
1293- " \n " ,
1294- " # Write DataFrame (without index)\n " ,
1295- " df_ontologies.to_excel(file_path_output, sheet_name=sheet_name, index=False)\n " ,
1296- " \n " ,
1297- " # Open with openpyxl and convert to Table\n " ,
1298- " wb = load_workbook(file_path_output)\n " ,
1299- " ws = wb[sheet_name]\n " ,
1300- " \n " ,
1301- " # Define range (from A1 to last cell)\n " ,
1302- " end_col = ws.max_column\n " ,
1303- " end_row = ws.max_row\n " ,
1304- " end_cell = ws.cell(row=end_row, column=end_col).coordinate\n " ,
1305- " table_ref = f\" A1:{end_cell}\"\n " ,
1306- " \n " ,
1307- " # Create table with style\n " ,
1308- " table = Table(displayName=\" OntologyTable\" , ref=table_ref)\n " ,
1309- " style = TableStyleInfo(\n " ,
1310- " name=\" TableStyleMedium9\" ,\n " ,
1311- " showFirstColumn=False,\n " ,
1312- " showLastColumn=False,\n " ,
1313- " showRowStripes=True,\n " ,
1314- " showColumnStripes=False\n " ,
1315- " )\n " ,
1316- " table.tableStyleInfo = style\n " ,
1317- " \n " ,
1318- " # Add table to sheet and save\n " ,
1319- " ws.add_table(table)\n " ,
1320- " wb.save(file_path_output)\n " ,
1321- " \n " ,
1322- " print(f\" Output saved to {file_path_output}\" )"
1291+ " # Process all ontologies (set debug=True to see detailed prefix/URI matching)\n " ,
1292+ " ontologies = process_ontologies_foops(ontologies, debug=False)\n " ,
1293+ " \n " ,
1294+ " # Write outputs\n " ,
1295+ " filepath = Path(file_path_ontologies)\n " ,
1296+ " output_excel = filepath.parent / \" Ontologies_forRepo.xlsx\"\n " ,
1297+ " write_output_EXCEL(ontologies, output_excel)\n " ,
1298+ " #output_json = filepath.parent / \" Ontologies_forRepo.json\"\n " ,
1299+ " #write_output_JSON(ontologies, output_json)\n "
13231300 ]
13241301 }
13251302 ],
13441321 },
13451322 "nbformat" : 4 ,
13461323 "nbformat_minor" : 2
1347- }
1324+ }
0 commit comments