Skip to content

Commit 0aadc06

Browse files
committed
add author ordering
1 parent f0f5ca9 commit 0aadc06

File tree

2 files changed

+93
-13
lines changed

2 files changed

+93
-13
lines changed

osf/metadata/serializers/datacite/datacite_tree_walker.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
FOAF,
1818
ORCID,
1919
OSF,
20+
PROV,
2021
ROR,
2122
SKOS,
2223
DATACITE,
@@ -128,23 +129,51 @@ def _visit_identifier(self, parent_el, *, doi_override=None):
128129
})
129130

130131
def _visit_creators(self, parent_el, focus_iri):
131-
creator_iris = set(self.basket[focus_iri:DCTERMS.creator])
132-
if (not creator_iris) and ((focus_iri, RDF.type, OSF.File) in self.basket):
133-
creator_iris.update(self.basket[focus_iri:OSF.hasFileVersion / DCTERMS.creator])
134-
if not creator_iris:
135-
creator_iris.update(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.creator])
136-
if not creator_iris:
137-
creator_iris.update(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.creator])
138-
if not creator_iris:
139-
creator_iris.update(self.basket[focus_iri:DCTERMS.contributor])
140-
if not creator_iris:
141-
creator_iris.update(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.contributor])
132+
creator_iris = []
133+
134+
ordered_contributors = []
135+
attribution_refs = list(self.basket[focus_iri:PROV.qualifiedAttribution])
136+
for attribution_ref in attribution_refs:
137+
try:
138+
order_val = next(self.basket[attribution_ref:OSF.order])
139+
except StopIteration:
140+
# If there is no explicit order, shove it to the end
141+
order_index = float('inf')
142+
else:
143+
try:
144+
order_index = order_val.toPython()
145+
except AttributeError:
146+
order_index = int(order_val)
147+
try:
148+
agent_iri = next(self.basket[attribution_ref:PROV.agent])
149+
except StopIteration:
150+
continue
151+
ordered_contributors.append((order_index, agent_iri))
152+
153+
if ordered_contributors:
154+
ordered_contributors.sort(key=lambda pair: pair[0])
155+
creator_iris.extend(agent_iri for _, agent_iri in ordered_contributors)
156+
157+
# Fallbacks when there is no explicit OSF ordering
142158
if not creator_iris:
143-
creator_iris.update(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.contributor])
159+
creator_iris = list(self.basket[focus_iri:DCTERMS.creator])
160+
if (not creator_iris) and ((focus_iri, RDF.type, OSF.File) in self.basket):
161+
creator_iris.extend(self.basket[focus_iri:OSF.hasFileVersion / DCTERMS.creator])
162+
if not creator_iris:
163+
creator_iris.extend(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.creator])
164+
if not creator_iris:
165+
creator_iris.extend(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.creator])
166+
if not creator_iris:
167+
creator_iris.extend(self.basket[focus_iri:DCTERMS.contributor])
168+
if not creator_iris:
169+
creator_iris.extend(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.contributor])
170+
if not creator_iris:
171+
creator_iris.extend(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.contributor])
172+
144173
if not creator_iris:
145174
raise ValueError(f'gathered no creators or contributors around {focus_iri}')
146175
creators_el = self.visit(parent_el, 'creators', is_list=True)
147-
for creator_iri in creator_iris: # TODO: "priority order"
176+
for creator_iri in creator_iris:
148177
creator_el = self.visit(creators_el, 'creator')
149178
for name in self.basket[creator_iri:FOAF.name]:
150179
self.visit(creator_el, 'creatorName', text=name, attrib={

tests/identifiers/test_datacite.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,57 @@ def test_datacite_build_metadata_for_dataarchive_registration(self, registration
129129
assert resource_type.text == 'Pre-registration'
130130
assert resource_type.attrib['resourceTypeGeneral'] == 'Dataset'
131131

132+
def test_datacite_creators_follow_osf_contributor_order(self, datacite_client):
133+
registration = RegistrationFactory(is_public=True)
134+
first = registration.creator
135+
second = AuthUserFactory()
136+
third = AuthUserFactory()
137+
registration.add_contributor(third, visible=True)
138+
registration.add_contributor(second, visible=True)
139+
registration.save()
140+
141+
visible_contributors = list(registration.visible_contributors)
142+
correct_order = [u.fullname for u in visible_contributors]
143+
assert correct_order == [
144+
first.fullname,
145+
third.fullname,
146+
second.fullname,
147+
]
148+
149+
metadata_xml = datacite_client.build_metadata(registration)
150+
parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
151+
root = lxml.etree.fromstring(metadata_xml, parser=parser)
152+
creators_el = root.find('{%s}creators' % schema40.ns[None])
153+
creator_elems = creators_el.findall('{%s}creator' % schema40.ns[None])
154+
xml_creator_names = [
155+
c.find('{%s}creatorName' % schema40.ns[None]).text
156+
for c in creator_elems
157+
]
158+
assert xml_creator_names == correct_order
159+
160+
auth = Auth(first)
161+
registration.move_contributor(first, auth=auth, index=2, save=True)
162+
registration.refresh_from_db()
163+
164+
visible_contributors = list(registration.visible_contributors)
165+
new_correct_order = [u.fullname for u in visible_contributors]
166+
assert new_correct_order == [
167+
third.fullname,
168+
second.fullname,
169+
first.fullname,
170+
]
171+
172+
metadata_xml = datacite_client.build_metadata(registration)
173+
root = lxml.etree.fromstring(metadata_xml, parser=parser)
174+
creators_el = root.find('{%s}creators' % schema40.ns[None])
175+
creator_elems = creators_el.findall('{%s}creator' % schema40.ns[None])
176+
xml_creator_names = [
177+
c.find('{%s}creatorName' % schema40.ns[None]).text
178+
for c in creator_elems
179+
]
180+
181+
assert xml_creator_names == new_correct_order
182+
132183
def test_datacite_format_contributors(self, datacite_client):
133184
visible_contrib = AuthUserFactory()
134185
visible_contrib2 = AuthUserFactory()

0 commit comments

Comments
 (0)