Skip to content

Commit 6abfdc2

Browse files
realmarcinclaude
andcommitted
Add Data Use Ontology (DUO) mappings to Data Governance module
Integrates standardized DUO terms for representing data use permissions and restrictions in the Data Governance module. Changes to D4D_Data_Governance.yaml: - Add DUO prefix (http://purl.obolibrary.org/obo/DUO_) - Add data_use_permission field to LicenseAndUseTerms with DataUsePermissionEnum - Add DUO broad_mappings to IPRestrictions (NCU, NPU for commercial restrictions) - Add DUO broad_mappings to ExportControlRegulatoryRestrictions (IRB, GS, IS) - Create comprehensive DataUsePermissionEnum with 24 DUO terms: * Permissions: no_restriction, general_research_use, health_medical_biomedical_research, disease_specific_research, population_origins_ancestry_research, clinical_care_use * Restrictions: no_commercial_use, non_profit_use_only, no_methods_development, genetic_studies_only, ethics_approval_required, collaboration_required, publication_required, geographic_restriction, institution_specific, project_specific, user_specific, time_limit, return_to_database, publication_moratorium, no_population_ancestry_research Benefits: - Standardized representation of data use conditions - Machine-readable permissions and restrictions - Interoperability with GA4GH DUO standard - Facilitates automated data discovery and access decisions - Maps licensing terms to established biomedical data sharing ontology DUO reference: https://github.com/EBISPOT/DUO GA4GH standard: https://www.ga4gh.org/product/data-use-ontology-duo/ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 20b4044 commit 6abfdc2

File tree

5 files changed

+1245
-678
lines changed

5 files changed

+1245
-678
lines changed

project/jsonld/data_sheets_schema.jsonld

Lines changed: 171 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,124 @@
10371037
}
10381038
]
10391039
},
1040+
{
1041+
"name": "DataUsePermissionEnum",
1042+
"definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance#DataUsePermissionEnum",
1043+
"description": "Data use permissions and restrictions based on the Data Use Ontology (DUO). DUO is a standardized ontology for representing data use conditions developed by GA4GH. See https://github.com/EBISPOT/DUO",
1044+
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance",
1045+
"permissible_values": [
1046+
{
1047+
"text": "no_restriction",
1048+
"description": "No restriction on data use",
1049+
"meaning": "DUO:0000004"
1050+
},
1051+
{
1052+
"text": "general_research_use",
1053+
"description": "Data available for any research purpose (GRU)",
1054+
"meaning": "DUO:0000042"
1055+
},
1056+
{
1057+
"text": "health_medical_biomedical_research",
1058+
"description": "Data limited to health, medical, or biomedical research (HMB)",
1059+
"meaning": "DUO:0000006"
1060+
},
1061+
{
1062+
"text": "disease_specific_research",
1063+
"description": "Data limited to research on specified disease(s) (DS)",
1064+
"meaning": "DUO:0000007"
1065+
},
1066+
{
1067+
"text": "population_origins_ancestry_research",
1068+
"description": "Data limited to population origins or ancestry research (POA)",
1069+
"meaning": "DUO:0000011"
1070+
},
1071+
{
1072+
"text": "clinical_care_use",
1073+
"description": "Data available for clinical care and applications (CC)",
1074+
"meaning": "DUO:0000043"
1075+
},
1076+
{
1077+
"text": "no_commercial_use",
1078+
"description": "Data use limited to non-commercial purposes (NCU)",
1079+
"meaning": "DUO:0000046"
1080+
},
1081+
{
1082+
"text": "non_profit_use_only",
1083+
"description": "Data use limited to not-for-profit organizations (NPU)",
1084+
"meaning": "DUO:0000045"
1085+
},
1086+
{
1087+
"text": "non_profit_use_and_non_commercial_use",
1088+
"description": "Data limited to not-for-profit organizations and non-commercial use (NPUNCU)",
1089+
"meaning": "DUO:0000018"
1090+
},
1091+
{
1092+
"text": "no_methods_development",
1093+
"description": "Data cannot be used for methods or software development (NMDS)",
1094+
"meaning": "DUO:0000015"
1095+
},
1096+
{
1097+
"text": "genetic_studies_only",
1098+
"description": "Data limited to genetic studies only (GSO)",
1099+
"meaning": "DUO:0000016"
1100+
},
1101+
{
1102+
"text": "ethics_approval_required",
1103+
"description": "Ethics approval (e.g., IRB/ERB) required for data use (IRB)",
1104+
"meaning": "DUO:0000021"
1105+
},
1106+
{
1107+
"text": "collaboration_required",
1108+
"description": "Collaboration with primary investigator required (COL)",
1109+
"meaning": "DUO:0000020"
1110+
},
1111+
{
1112+
"text": "publication_required",
1113+
"description": "Results must be published/shared with research community (PUB)",
1114+
"meaning": "DUO:0000019"
1115+
},
1116+
{
1117+
"text": "geographic_restriction",
1118+
"description": "Data use limited to specific geographic region (GS)",
1119+
"meaning": "DUO:0000022"
1120+
},
1121+
{
1122+
"text": "institution_specific",
1123+
"description": "Data use limited to approved institutions (IS)",
1124+
"meaning": "DUO:0000028"
1125+
},
1126+
{
1127+
"text": "project_specific",
1128+
"description": "Data use limited to approved project(s) (PS)",
1129+
"meaning": "DUO:0000027"
1130+
},
1131+
{
1132+
"text": "user_specific",
1133+
"description": "Data use limited to approved users (US)",
1134+
"meaning": "DUO:0000026"
1135+
},
1136+
{
1137+
"text": "time_limit",
1138+
"description": "Data use approved for limited time period (TS)",
1139+
"meaning": "DUO:0000025"
1140+
},
1141+
{
1142+
"text": "return_to_database",
1143+
"description": "Derived data must be returned to database/resource (RTN)",
1144+
"meaning": "DUO:0000029"
1145+
},
1146+
{
1147+
"text": "publication_moratorium",
1148+
"description": "Publication restricted until specified date (MOR)",
1149+
"meaning": "DUO:0000024"
1150+
},
1151+
{
1152+
"text": "no_population_ancestry_research",
1153+
"description": "Population/ancestry research prohibited (NPOA)",
1154+
"meaning": "DUO:0000044"
1155+
}
1156+
]
1157+
},
10401158
{
10411159
"name": "VariableTypeEnum",
10421160
"definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/variables#VariableTypeEnum",
@@ -4067,6 +4185,26 @@
40674185
"multivalued": true,
40684186
"@type": "SlotDefinition"
40694187
},
4188+
{
4189+
"name": "licenseAndUseTerms__data_use_permission",
4190+
"description": "Structured data use permissions using the Data Use Ontology (DUO). Specifies permitted uses (e.g., general research, health/medical research, disease-specific research) and restrictions (e.g., non-commercial use, ethics approval required, collaboration required). See https://github.com/EBISPOT/DUO",
4191+
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance",
4192+
"mappings": [
4193+
"http://purl.obolibrary.org/obo/DUO_0000001"
4194+
],
4195+
"exact_mappings": [
4196+
"http://purl.obolibrary.org/obo/DUO_0000001"
4197+
],
4198+
"slot_uri": "http://purl.obolibrary.org/obo/DUO_0000001",
4199+
"alias": "data_use_permission",
4200+
"owner": "LicenseAndUseTerms",
4201+
"domain_of": [
4202+
"LicenseAndUseTerms"
4203+
],
4204+
"range": "DataUsePermissionEnum",
4205+
"multivalued": true,
4206+
"@type": "SlotDefinition"
4207+
},
40704208
{
40714209
"name": "licenseAndUseTerms__contact_person",
40724210
"description": "Contact person for licensing questions. Provides structured contact information including name, email, affiliation, and optional ORCID. This person can answer questions about licensing terms, usage restrictions, fees, and permissions.",
@@ -4093,6 +4231,10 @@
40934231
"mappings": [
40944232
"http://purl.org/dc/terms/rights"
40954233
],
4234+
"broad_mappings": [
4235+
"http://purl.obolibrary.org/obo/DUO_0000046",
4236+
"http://purl.obolibrary.org/obo/DUO_0000045"
4237+
],
40964238
"slot_uri": "http://purl.org/dc/terms/rights",
40974239
"alias": "description",
40984240
"owner": "IPRestrictions",
@@ -4110,6 +4252,11 @@
41104252
"mappings": [
41114253
"http://purl.org/dc/terms/accessRights"
41124254
],
4255+
"broad_mappings": [
4256+
"http://purl.obolibrary.org/obo/DUO_0000021",
4257+
"http://purl.obolibrary.org/obo/DUO_0000022",
4258+
"http://purl.obolibrary.org/obo/DUO_0000028"
4259+
],
41134260
"slot_uri": "http://purl.org/dc/terms/accessRights",
41144261
"alias": "description",
41154262
"owner": "ExportControlRegulatoryRestrictions",
@@ -6962,6 +7109,7 @@
69627109
"namedThing__name",
69637110
"datasetProperty__used_software",
69647111
"licenseAndUseTerms__description",
7112+
"licenseAndUseTerms__data_use_permission",
69657113
"licenseAndUseTerms__contact_person"
69667114
],
69677115
"slot_usage": {},
@@ -6974,6 +7122,17 @@
69747122
"multivalued": true,
69757123
"@type": "SlotDefinition"
69767124
},
7125+
{
7126+
"name": "data_use_permission",
7127+
"description": "Structured data use permissions using the Data Use Ontology (DUO). Specifies permitted uses (e.g., general research, health/medical research, disease-specific research) and restrictions (e.g., non-commercial use, ethics approval required, collaboration required). See https://github.com/EBISPOT/DUO",
7128+
"exact_mappings": [
7129+
"DUO:0000001"
7130+
],
7131+
"slot_uri": "DUO:0000001",
7132+
"range": "DataUsePermissionEnum",
7133+
"multivalued": true,
7134+
"@type": "SlotDefinition"
7135+
},
69777136
{
69787137
"name": "contact_person",
69797138
"description": "Contact person for licensing questions. Provides structured contact information including name, email, affiliation, and optional ORCID. This person can answer questions about licensing terms, usage restrictions, fees, and permissions.",
@@ -6991,7 +7150,7 @@
69917150
{
69927151
"name": "IPRestrictions",
69937152
"definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance#IPRestrictions",
6994-
"description": "Have any third parties imposed IP-based or other restrictions on the data associated with the instances? If so, describe them and note any relevant fees or licensing terms.\n",
7153+
"description": "Have any third parties imposed IP-based or other restrictions on the data associated with the instances? If so, describe them and note any relevant fees or licensing terms. Maps to DUO terms related to commercial/non-profit use restrictions (NCU, NPU, NPUNCU).\n",
69957154
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance",
69967155
"is_a": "DatasetProperty",
69977156
"slots": [
@@ -7005,6 +7164,10 @@
70057164
{
70067165
"name": "description",
70077166
"description": "Explanation of third-party IP restrictions.",
7167+
"broad_mappings": [
7168+
"DUO:0000046",
7169+
"DUO:0000045"
7170+
],
70087171
"slot_uri": "dcterms:rights",
70097172
"range": "string",
70107173
"multivalued": true,
@@ -7017,7 +7180,7 @@
70177180
{
70187181
"name": "ExportControlRegulatoryRestrictions",
70197182
"definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance#ExportControlRegulatoryRestrictions",
7020-
"description": "Do any export controls or other regulatory restrictions apply to the dataset or to individual instances? Includes compliance tracking for regulations like GDPR, HIPAA, and EU AI Act. If so, please describe these restrictions and provide a link or copy of any supporting documentation.\n",
7183+
"description": "Do any export controls or other regulatory restrictions apply to the dataset or to individual instances? Includes compliance tracking for regulations like GDPR, HIPAA, and EU AI Act. If so, please describe these restrictions and provide a link or copy of any supporting documentation. Maps to DUO terms related to ethics approval, geographic restrictions, and institutional requirements.\n",
70217184
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance",
70227185
"is_a": "DatasetProperty",
70237186
"slots": [
@@ -7035,6 +7198,11 @@
70357198
{
70367199
"name": "description",
70377200
"description": "Export or regulatory restrictions on the dataset.",
7201+
"broad_mappings": [
7202+
"DUO:0000021",
7203+
"DUO:0000022",
7204+
"DUO:0000028"
7205+
],
70387206
"slot_uri": "dcterms:accessRights",
70397207
"range": "string",
70407208
"multivalued": true,
@@ -7225,7 +7393,7 @@
72257393
"source_file": "data_sheets_schema.yaml",
72267394
"source_file_date": "2025-11-18T17:40:41",
72277395
"source_file_size": 9614,
7228-
"generation_date": "2025-11-20T12:24:02",
7396+
"generation_date": "2025-11-20T12:34:09",
72297397
"@type": "SchemaDefinition",
72307398
"@context": [
72317399
"project/jsonld/data_sheets_schema.context.jsonld",

project/jsonschema/data_sheets_schema.schema.json

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,35 @@
12761276
"title": "DataSubset",
12771277
"type": "object"
12781278
},
1279+
"DataUsePermissionEnum": {
1280+
"description": "Data use permissions and restrictions based on the Data Use Ontology (DUO). DUO is a standardized ontology for representing data use conditions developed by GA4GH. See https://github.com/EBISPOT/DUO",
1281+
"enum": [
1282+
"no_restriction",
1283+
"general_research_use",
1284+
"health_medical_biomedical_research",
1285+
"disease_specific_research",
1286+
"population_origins_ancestry_research",
1287+
"clinical_care_use",
1288+
"no_commercial_use",
1289+
"non_profit_use_only",
1290+
"non_profit_use_and_non_commercial_use",
1291+
"no_methods_development",
1292+
"genetic_studies_only",
1293+
"ethics_approval_required",
1294+
"collaboration_required",
1295+
"publication_required",
1296+
"geographic_restriction",
1297+
"institution_specific",
1298+
"project_specific",
1299+
"user_specific",
1300+
"time_limit",
1301+
"return_to_database",
1302+
"publication_moratorium",
1303+
"no_population_ancestry_research"
1304+
],
1305+
"title": "DataUsePermissionEnum",
1306+
"type": "string"
1307+
},
12791308
"Dataset": {
12801309
"additionalProperties": false,
12811310
"description": "A single component of related observations and/or information that can be read, manipulated, transformed, and otherwise interpreted.",
@@ -2452,7 +2481,7 @@
24522481
},
24532482
"ExportControlRegulatoryRestrictions": {
24542483
"additionalProperties": false,
2455-
"description": "Do any export controls or other regulatory restrictions apply to the dataset or to individual instances? Includes compliance tracking for regulations like GDPR, HIPAA, and EU AI Act. If so, please describe these restrictions and provide a link or copy of any supporting documentation.",
2484+
"description": "Do any export controls or other regulatory restrictions apply to the dataset or to individual instances? Includes compliance tracking for regulations like GDPR, HIPAA, and EU AI Act. If so, please describe these restrictions and provide a link or copy of any supporting documentation. Maps to DUO terms related to ethics approval, geographic restrictions, and institutional requirements.",
24562485
"properties": {
24572486
"description": {
24582487
"description": "Export or regulatory restrictions on the dataset.",
@@ -3022,7 +3051,7 @@
30223051
},
30233052
"IPRestrictions": {
30243053
"additionalProperties": false,
3025-
"description": "Have any third parties imposed IP-based or other restrictions on the data associated with the instances? If so, describe them and note any relevant fees or licensing terms.",
3054+
"description": "Have any third parties imposed IP-based or other restrictions on the data associated with the instances? If so, describe them and note any relevant fees or licensing terms. Maps to DUO terms related to commercial/non-profit use restrictions (NCU, NPU, NPUNCU).",
30263055
"properties": {
30273056
"description": {
30283057
"description": "Explanation of third-party IP restrictions.",
@@ -3561,6 +3590,16 @@
35613590
"null"
35623591
]
35633592
},
3593+
"data_use_permission": {
3594+
"description": "Structured data use permissions using the Data Use Ontology (DUO). Specifies permitted uses (e.g., general research, health/medical research, disease-specific research) and restrictions (e.g., non-commercial use, ethics approval required, collaboration required). See https://github.com/EBISPOT/DUO",
3595+
"items": {
3596+
"$ref": "#/$defs/DataUsePermissionEnum"
3597+
},
3598+
"type": [
3599+
"array",
3600+
"null"
3601+
]
3602+
},
35643603
"description": {
35653604
"description": "Description of the dataset's license and terms of use (including links, costs, or usage constraints).\n",
35663605
"items": {

0 commit comments

Comments
 (0)