Skip to content

Commit 20b4044

Browse files
realmarcinclaude
andcommitted
Move CRediT roles from Person to usage contexts (Option 1)
Addresses issue where Person class had context-dependent credit_roles that would cause conflicts when merging datasets by ORCID. Changes: - Remove credit_roles from Person class (D4D_Base_import.yaml) - Add credit_roles to Creator class (D4D_Motivation.yaml) - Update Person description to clarify dataset-scoped context - Document that affiliation/email are contextual but acceptable - Document that credit_roles vary by dataset and belong in usage contexts Reasoning: - CRediT roles are inherently dataset-specific (same person, different roles) - Person should represent stable identity (ORCID) + contact info - Roles belong to the contribution context, not the person entity - Affiliation/email remain on Person as they're multivalued and match schema.org patterns, representing "current contact for this dataset" This preserves ability to use ORCID for cross-dataset identification while allowing role information to vary appropriately by context. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent de9ec21 commit 20b4044

File tree

6 files changed

+748
-731
lines changed

6 files changed

+748
-731
lines changed

project/jsonld/data_sheets_schema.jsonld

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,7 +2329,7 @@
23292329
},
23302330
{
23312331
"name": "person__affiliation",
2332-
"description": "The organization(s) to which the person belongs.",
2332+
"description": "The organization(s) to which the person belongs in the context of this dataset. May vary across datasets; multivalued to support multiple affiliations.",
23332333
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base",
23342334
"mappings": [
23352335
"http://schema.org/affiliation"
@@ -2346,7 +2346,7 @@
23462346
},
23472347
{
23482348
"name": "person__email",
2349-
"description": "The email address of the person.",
2349+
"description": "The email address of the person. Represents current/preferred contact information in the context of this dataset.",
23502350
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base",
23512351
"mappings": [
23522352
"http://schema.org/email"
@@ -2362,7 +2362,7 @@
23622362
},
23632363
{
23642364
"name": "person__orcid",
2365-
"description": "ORCID (Open Researcher and Contributor ID) - a persistent digital identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups of 4).",
2365+
"description": "ORCID (Open Researcher and Contributor ID) - a persistent digital identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups of 4). Use this for stable cross-dataset identification.",
23662366
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base",
23672367
"mappings": [
23682368
"http://schema.org/identifier"
@@ -2380,20 +2380,6 @@
23802380
"pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{3}[0-9X]$",
23812381
"@type": "SlotDefinition"
23822382
},
2383-
{
2384-
"name": "person__credit_roles",
2385-
"description": "Contributor roles using the CRediT (Contributor Roles Taxonomy). Specifies the specific contributions made by this person to the dataset.",
2386-
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base",
2387-
"slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/credit_roles",
2388-
"alias": "credit_roles",
2389-
"owner": "Person",
2390-
"domain_of": [
2391-
"Person"
2392-
],
2393-
"range": "CRediTRoleEnum",
2394-
"multivalued": true,
2395-
"@type": "SlotDefinition"
2396-
},
23972383
{
23982384
"name": "formatDialect__comment_prefix",
23992385
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base",
@@ -2537,6 +2523,20 @@
25372523
"range": "Organization",
25382524
"@type": "SlotDefinition"
25392525
},
2526+
{
2527+
"name": "creator__credit_roles",
2528+
"description": "Contributor roles using the CRediT (Contributor Roles Taxonomy) for the principal investigator or creator team. Specifies the specific contributions made to this dataset (e.g., Conceptualization, Data Curation, Methodology). Note: roles are specified here rather than on Person directly, since the same person may have different roles across different datasets.",
2529+
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/motivation",
2530+
"slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/motivation#credit_roles",
2531+
"alias": "credit_roles",
2532+
"owner": "Creator",
2533+
"domain_of": [
2534+
"Creator"
2535+
],
2536+
"range": "CRediTRoleEnum",
2537+
"multivalued": true,
2538+
"@type": "SlotDefinition"
2539+
},
25402540
{
25412541
"name": "fundingMechanism__grantor",
25422542
"description": "Name/identifier of the organization providing monetary or resource support.",
@@ -5062,7 +5062,7 @@
50625062
{
50635063
"name": "Person",
50645064
"definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/Person",
5065-
"description": "An individual human being.",
5065+
"description": "An individual human being. This class represents a person in the context of a specific dataset. Attributes like affiliation and email represent the person's current or most relevant contact information for this dataset. For stable cross-dataset identification, use the ORCID field. Note that contributor roles (CRediT) are specified in the usage context (e.g., Creator class) rather than on the Person directly, since roles vary by dataset.",
50665066
"from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base",
50675067
"mappings": [
50685068
"schema:Person"
@@ -5077,43 +5077,35 @@
50775077
"namedThing__description",
50785078
"person__affiliation",
50795079
"person__email",
5080-
"person__orcid",
5081-
"person__credit_roles"
5080+
"person__orcid"
50825081
],
50835082
"slot_usage": {},
50845083
"attributes": [
50855084
{
50865085
"name": "affiliation",
5087-
"description": "The organization(s) to which the person belongs.",
5086+
"description": "The organization(s) to which the person belongs in the context of this dataset. May vary across datasets; multivalued to support multiple affiliations.",
50885087
"slot_uri": "schema:affiliation",
50895088
"range": "Organization",
50905089
"multivalued": true,
50915090
"@type": "SlotDefinition"
50925091
},
50935092
{
50945093
"name": "email",
5095-
"description": "The email address of the person.",
5094+
"description": "The email address of the person. Represents current/preferred contact information in the context of this dataset.",
50965095
"slot_uri": "schema:email",
50975096
"range": "string",
50985097
"@type": "SlotDefinition"
50995098
},
51005099
{
51015100
"name": "orcid",
5102-
"description": "ORCID (Open Researcher and Contributor ID) - a persistent digital identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups of 4).",
5101+
"description": "ORCID (Open Researcher and Contributor ID) - a persistent digital identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups of 4). Use this for stable cross-dataset identification.",
51035102
"exact_mappings": [
51045103
"schema:identifier"
51055104
],
51065105
"slot_uri": "schema:identifier",
51075106
"range": "string",
51085107
"pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{3}[0-9X]$",
51095108
"@type": "SlotDefinition"
5110-
},
5111-
{
5112-
"name": "credit_roles",
5113-
"description": "Contributor roles using the CRediT (Contributor Roles Taxonomy). Specifies the specific contributions made by this person to the dataset.",
5114-
"range": "CRediTRoleEnum",
5115-
"multivalued": true,
5116-
"@type": "SlotDefinition"
51175109
}
51185110
],
51195111
"class_uri": "http://schema.org/Person",
@@ -5285,7 +5277,8 @@
52855277
"namedThing__description",
52865278
"datasetProperty__used_software",
52875279
"creator__principal_investigator",
5288-
"creator__affiliation"
5280+
"creator__affiliation",
5281+
"creator__credit_roles"
52895282
],
52905283
"slot_usage": {},
52915284
"attributes": [
@@ -5305,6 +5298,13 @@
53055298
"slot_uri": "schema:affiliation",
53065299
"range": "Organization",
53075300
"@type": "SlotDefinition"
5301+
},
5302+
{
5303+
"name": "credit_roles",
5304+
"description": "Contributor roles using the CRediT (Contributor Roles Taxonomy) for the principal investigator or creator team. Specifies the specific contributions made to this dataset (e.g., Conceptualization, Data Curation, Methodology). Note: roles are specified here rather than on Person directly, since the same person may have different roles across different datasets.",
5305+
"range": "CRediTRoleEnum",
5306+
"multivalued": true,
5307+
"@type": "SlotDefinition"
53085308
}
53095309
],
53105310
"class_uri": "https://w3id.org/bridge2ai/data-sheets-schema/motivation#Creator",
@@ -7225,7 +7225,7 @@
72257225
"source_file": "data_sheets_schema.yaml",
72267226
"source_file_date": "2025-11-18T17:40:41",
72277227
"source_file_size": 9614,
7228-
"generation_date": "2025-11-20T12:15:34",
7228+
"generation_date": "2025-11-20T12:24:02",
72297229
"@type": "SchemaDefinition",
72307230
"@context": [
72317231
"project/jsonld/data_sheets_schema.context.jsonld",

project/jsonschema/data_sheets_schema.schema.json

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,16 @@
496496
"null"
497497
]
498498
},
499+
"credit_roles": {
500+
"description": "Contributor roles using the CRediT (Contributor Roles Taxonomy) for the principal investigator or creator team. Specifies the specific contributions made to this dataset (e.g., Conceptualization, Data Curation, Methodology). Note: roles are specified here rather than on Person directly, since the same person may have different roles across different datasets.",
501+
"items": {
502+
"$ref": "#/$defs/CRediTRoleEnum"
503+
},
504+
"type": [
505+
"array",
506+
"null"
507+
]
508+
},
499509
"description": {
500510
"description": "A human-readable description for a thing.",
501511
"type": [
@@ -3897,10 +3907,10 @@
38973907
},
38983908
"Person": {
38993909
"additionalProperties": false,
3900-
"description": "An individual human being.",
3910+
"description": "An individual human being. This class represents a person in the context of a specific dataset. Attributes like affiliation and email represent the person's current or most relevant contact information for this dataset. For stable cross-dataset identification, use the ORCID field. Note that contributor roles (CRediT) are specified in the usage context (e.g., Creator class) rather than on the Person directly, since roles vary by dataset.",
39013911
"properties": {
39023912
"affiliation": {
3903-
"description": "The organization(s) to which the person belongs.",
3913+
"description": "The organization(s) to which the person belongs in the context of this dataset. May vary across datasets; multivalued to support multiple affiliations.",
39043914
"items": {
39053915
"type": "string"
39063916
},
@@ -3909,16 +3919,6 @@
39093919
"null"
39103920
]
39113921
},
3912-
"credit_roles": {
3913-
"description": "Contributor roles using the CRediT (Contributor Roles Taxonomy). Specifies the specific contributions made by this person to the dataset.",
3914-
"items": {
3915-
"$ref": "#/$defs/CRediTRoleEnum"
3916-
},
3917-
"type": [
3918-
"array",
3919-
"null"
3920-
]
3921-
},
39223922
"description": {
39233923
"description": "A human-readable description for a thing.",
39243924
"type": [
@@ -3927,7 +3927,7 @@
39273927
]
39283928
},
39293929
"email": {
3930-
"description": "The email address of the person.",
3930+
"description": "The email address of the person. Represents current/preferred contact information in the context of this dataset.",
39313931
"type": [
39323932
"string",
39333933
"null"
@@ -3945,7 +3945,7 @@
39453945
]
39463946
},
39473947
"orcid": {
3948-
"description": "ORCID (Open Researcher and Contributor ID) - a persistent digital identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups of 4).",
3948+
"description": "ORCID (Open Researcher and Contributor ID) - a persistent digital identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups of 4). Use this for stable cross-dataset identification.",
39493949
"pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{3}[0-9X]$",
39503950
"type": [
39513951
"string",

0 commit comments

Comments
 (0)