Skip to content

Commit ef0ffc6

Browse files
committed
Cleanup and better DOI handling
1 parent 0bd6a8e commit ef0ffc6

File tree

8 files changed

+89
-84
lines changed

8 files changed

+89
-84
lines changed

README.md

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# caltechdata_api
22

3-
[![DOI](https://data.caltech.edu/badge/87960443.svg)](https://data.caltech.edu/badge/latestdoi/87960443)
4-
53
Python library for using the CaltechDATA API
64

75
- caltechdata_write write files and a DataCite 4 standard json record to CaltechDATA repository
@@ -18,49 +16,47 @@ Create a record:
1816

1917
```shell
2018
python write.py example.json -fnames logo.gif
21-
Successfully created record https://cd-sandbox.tind.io/records/352.
19+
pbkn6-m9y63
2220
```
21+
The response will be the unique identifier for the record. You can put this at
22+
the end of a url to visit the record (e.g.
23+
https://data.caltechlibrary.dev/records/pbkn6-m9y63)
2324

2425
Edit a record (make changes to the example.json file to see a change)
2526
```
26-
python edit.py example.json -ids 352 -fnames logo.gif
27-
Successfully modified record https://cd-sandbox.tind.io/records/352
27+
python edit.py example.json -id pbkn6-m9y63
28+
10.33569/pbkn6-m9y63
2829
```
30+
The response is the DOI for the record, which includes the unique identifier
31+
for the record in the default configuration.
2932

30-
Get geographic metadata from CaltechDATA with WKT representations in a csv file.
31-
You can import this to a GIS program like QGIS
32-
using a delimited text import and projection epsg:4326. You'll have to do one
33-
import for Geometry type Point and another for Geometry type Polygon.
33+
Some groups have worked with the library to create custom DOIs. These can be
34+
passed in the metadata like:
3435

35-
```
36-
python get_geo.py caltechdata_geo.csv
36+
```shell
37+
python write.py example_custom.json -fnames logo.gif
38+
m6zxz-p4j22
3739
```
3840

39-
You can filter by keyword
40-
41+
And then you can edit with
4142
```
42-
python get_geo.py caltechdata_geo.csv -keywords TCCON
43+
python edit.py example_custom.json -id m6zxz-p4j22
44+
10.5281/inveniordm.1234
4345
```
4446

47+
This returns the custom DOI of the record if it is successful.
4548

4649
## Setup
4750

4851
Install by typing 'pip install caltechdata_api'
4952

5053
## Usage
5154

52-
Write API access is controlled by repository staff. Email us at [email protected]
53-
with your request if you want to use the write API.
54-
5555
You need to acquire a personal access token from your CaltechDATA account
5656
(find it at the top right of your screen under "Applications").
57-
Make sure you include the "deposit_api:write" and "file_manager:upload"
58-
scopes. Then copy the token to token.bash. Type source token.bash in
57+
Then copy the token to token.bash. Type `source token.bash` in
5958
the command line to load the token.
6059

61-
Only test your application on the test repository. Testing the API on the public
60+
Only test your application on the test repository (data.caltechlibrary.dev). Testing the API on the public
6261
repository will generate junk records that are annoying to delete.
6362

64-
## TODO
65-
66-
Handle incorrect token on file upload

caltechdata_api/caltechdata_edit.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,13 @@ def caltechdata_edit(
8282
repo_prefix = "10.33569"
8383
pids = {}
8484
oai = False
85+
doi = False
8586
if "identifiers" in metadata:
8687
for identifier in metadata["identifiers"]:
8788
if identifier["identifierType"] == "DOI":
89+
doi = True
8890
doi = identifier["identifier"]
8991
prefix = doi.split("/")[0]
90-
9192
if prefix == repo_prefix:
9293
pids["doi"] = {
9394
"identifier": doi,
@@ -105,11 +106,20 @@ def caltechdata_edit(
105106
"provider": "oai",
106107
}
107108
oai = True
109+
#Records are not happy without the auto-assigned oai identifier
108110
if oai == False:
109111
pids["oai"] = {
110112
"identifier": f"oai:data.caltech.edu:{idv}",
111113
"provider": "oai",
112114
}
115+
#We do not want to lose the auto-assigned DOI
116+
#Users with custom DOIs must pass them in the metadata
117+
if doi == False:
118+
pids["doi"] = {
119+
"identifier": f'{repo_prefix}/{idv}',
120+
"provider": "datacite",
121+
"client": "datacite",
122+
}
113123
metadata["pids"] = pids
114124

115125
data = customize_schema.customize_schema(copy.deepcopy(metadata), schema=schema)

caltechdata_api/caltechdata_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ def caltechdata_write(
129129
repo_prefix = "10.22002"
130130
else:
131131
repo_prefix = "10.33569"
132+
pids = {}
132133
if "identifiers" in metadata:
133-
pids = {}
134134
for identifier in metadata["identifiers"]:
135135
if identifier["identifierType"] == "DOI":
136136
doi = identifier["identifier"]

edit.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
default=None,
1212
help="file name for json DataCite metadata file",
1313
)
14-
parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
14+
parser.add_argument("-id", help="CaltechDATA IDs")
1515
parser.add_argument("-fnames", nargs="*", help="New Files")
1616
parser.add_argument("-flinks", nargs="*", help="New File Links")
1717
parser.add_argument("-schema", default="43", help="Metadata Schema")
@@ -26,11 +26,11 @@
2626
else:
2727
metadata = {}
2828

29-
production = True
29+
production = False
3030
publish = True
3131

3232
response = caltechdata_edit(
33-
args.ids,
33+
args.id,
3434
metadata,
3535
token,
3636
args.fnames,

example.json

100755100644
Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,48 @@
11
{
2-
"alternateIdentifiers": [
3-
{
4-
"alternateIdentifier": "Alt_Identifier",
5-
"alternateIdentifierType": "Alt_Identifier_Type"
6-
}
2+
"identifiers": [
3+
{"identifier": "1924MNRAS..84..308E", "identifierType": "bibcode"}
74
],
85
"contributors": [
96
{
10-
"affiliations": [ "Affiliation" ],
11-
"contributorName": "Contributor Name",
7+
"nameType": "Personal",
8+
"affiliation": [
9+
{
10+
"name": "DataCitea",
11+
"affiliationIdentifier": "https://ror.org/04wxnsj81",
12+
"affiliationIdentifierScheme": "ROR"
13+
}
14+
],
15+
"name": "Contributor Name",
16+
"familyName": "Family Name",
17+
"givenName": "Given Name",
1218
"contributorType": "ContactPerson",
1319
"nameIdentifiers": [
1420
{
15-
"nameIdentifier": "Contributor Identifier",
16-
"nameIdentifierScheme": "GRID",
17-
"schemeURI": "https://www.grid.ac/institutes/"
21+
"nameIdentifier": "0000-0002-1825-0097",
22+
"nameIdentifierScheme": "ORCID",
23+
"schemeUri": "https://orcid.org/"
1824
}
1925
]
2026
}
2127
],
2228
"creators": [
2329
{
24-
"affiliations": [
25-
"Affiliation"
30+
"nameType": "Personal",
31+
"affiliation": [
32+
{
33+
"name": "DataCite",
34+
"affiliationIdentifier": "https://ror.org/04wxnsj81",
35+
"affiliationIdentifierScheme": "ROR"
36+
}
2637
],
27-
"creatorName": "Name",
38+
"name": "Name",
2839
"familyName": "Family Name",
2940
"givenName": "Given Name",
3041
"nameIdentifiers": [
3142
{
32-
"nameIdentifier": "Name Identifier",
43+
"nameIdentifier": "0000-0002-1825-0097",
3344
"nameIdentifierScheme": "ORCID",
34-
"schemeURI": "http://orcid.org/"
45+
"schemeUri": "https://orcid.org/"
3546
}
3647
]
3748
}
@@ -41,10 +52,6 @@
4152
"date": "2014-10-01",
4253
"dateType": "Created"
4354
},
44-
{
45-
"date": "2017-03-01",
46-
"dateType": "Updated"
47-
},
4855
{
4956
"date": "2012-05-22/2016-12-21",
5057
"dateType": "Collected"
@@ -60,24 +67,24 @@
6067
"format"
6168
],
6269
"fundingReferences": [
63-
{
64-
"funderIdentifier": {
65-
"funderIdentifier": "Funding Identifier",
66-
"funderIdentifierType": "GRID"
67-
},
68-
"funderName": "Funder Name"
69-
}
70+
{
71+
"awardTitle": "Measurement of Column-Averaged CO2",
72+
"funderName": "National Aeronautics and Space Administration",
73+
"funderIdentifierType": "GRID",
74+
"funderIdentifier": "grid.238252.c",
75+
"awardNumber": "NAG5-12247"
76+
}
7077
],
7178
"geoLocations": [
7279
{
7380
"geoLocationPlace": "Place Name",
7481
"geoLocationPoint": {
75-
"pointLatitude": 0.000,
76-
"pointLongitude": 0.000
82+
"pointLatitude": "34.138",
83+
"pointLongitude": "-118.1258"
7784
}
7885
}
7986
],
80-
"language": "en",
87+
"language": "eng",
8188
"publicationYear": "2017",
8289
"publisher": "Publisher",
8390
"relatedIdentifiers": [
@@ -92,8 +99,9 @@
9299
"relationType": "IsDocumentedBy"
93100
}
94101
],
95-
"resourceType": {
96-
"resourceTypeGeneral": "Dataset"
102+
"types": {
103+
"resourceTypeGeneral": "Dataset",
104+
"resourceType": "Dataset"
97105
},
98106
"rightsList": [
99107
{
@@ -118,5 +126,6 @@
118126
"titleType": "AlternativeTitle"
119127
}
120128
],
121-
"version": "0"
129+
"version": "0",
130+
"schemaVersion": "http://datacite.org/schema/kernel-4"
122131
}

example43.json renamed to example_custom.json

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,27 @@
11
{
2-
"identifiers": [
3-
{
4-
"identifier": "10.14291/1",
5-
"identifierType": "DOI"
6-
}],
7-
"alternateIdentifiers": [
8-
{
9-
"alternateIdentifier": "Alt_Identifier",
10-
"alternateIdentifierType": "Alt_Identifier_Type"
11-
}
2+
"identifiers": [
3+
{"identifier": "10.5281/inveniordm.1234", "identifierType": "DOI"},
4+
{"identifier": "1924MNRAS..84..308E", "identifierType": "bibcode"}
125
],
136
"contributors": [
147
{
158
"nameType": "Personal",
169
"affiliation": [
1710
{
18-
"name": "DataCite",
11+
"name": "DataCitea",
1912
"affiliationIdentifier": "https://ror.org/04wxnsj81",
2013
"affiliationIdentifierScheme": "ROR"
2114
}
2215
],
2316
"name": "Contributor Name",
17+
"familyName": "Family Name",
18+
"givenName": "Given Name",
2419
"contributorType": "ContactPerson",
2520
"nameIdentifiers": [
2621
{
27-
"nameIdentifier": "Contributor Identifier",
28-
"nameIdentifierScheme": "GRID",
29-
"schemeUri": "https://www.grid.ac/institutes/"
22+
"nameIdentifier": "0000-0002-1825-0097",
23+
"nameIdentifierScheme": "ORCID",
24+
"schemeUri": "https://orcid.org/"
3025
}
3126
]
3227
}
@@ -46,9 +41,9 @@
4641
"givenName": "Given Name",
4742
"nameIdentifiers": [
4843
{
49-
"nameIdentifier": "Name Identifier",
44+
"nameIdentifier": "0000-0002-1825-0097",
5045
"nameIdentifierScheme": "ORCID",
51-
"schemeUri": "http://orcid.org/"
46+
"schemeUri": "https://orcid.org/"
5247
}
5348
]
5449
}
@@ -58,10 +53,6 @@
5853
"date": "2014-10-01",
5954
"dateType": "Created"
6055
},
61-
{
62-
"date": "2017-03-01",
63-
"dateType": "Updated"
64-
},
6556
{
6657
"date": "2012-05-22/2016-12-21",
6758
"dateType": "Collected"
@@ -89,8 +80,8 @@
8980
{
9081
"geoLocationPlace": "Place Name",
9182
"geoLocationPoint": {
92-
"pointLatitude": "0.000",
93-
"pointLongitude": "0.000"
83+
"pointLatitude": "34.138",
84+
"pointLongitude": "-118.1258"
9485
}
9586
}
9687
],

outdated/get_geo.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import os, json, csv, argparse
22
import requests
3-
from caltechdata_api import decustomize_schema
43

54
if __name__ == "__main__":
65
parser = argparse.ArgumentParser(

write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
metaf = open(args.json_file[0], "r")
2121
metadata = json.load(metaf)
2222

23-
production = True
23+
production = False
2424
publish = False
2525

2626
response = caltechdata_write(

0 commit comments

Comments
 (0)