Skip to content

Commit 206c35b

Browse files
Add example notebook
Add example notebook for uploading data to the caltechdata system.
1 parent abb6cb3 commit 206c35b

File tree

2 files changed

+275
-1
lines changed

2 files changed

+275
-1
lines changed
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"provenance": [],
7+
"mount_file_id": "16c_qOZHmh9T8mBCqk5a6_os2H-D4lJuH",
8+
"authorship_tag": "ABX9TyMYP9gVRivbIGD6aN1ednNp",
9+
"include_colab_link": true
10+
},
11+
"kernelspec": {
12+
"name": "python3",
13+
"display_name": "Python 3"
14+
},
15+
"language_info": {
16+
"name": "python"
17+
}
18+
},
19+
"cells": [
20+
{
21+
"cell_type": "markdown",
22+
"metadata": {
23+
"id": "view-in-github",
24+
"colab_type": "text"
25+
},
26+
"source": [
27+
"<a href=\"https://colab.research.google.com/github/AbakahAlexander/caltechdata_api/blob/main/Uploading_dataset_to_CaltechDATA.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
28+
]
29+
},
30+
{
31+
"cell_type": "markdown",
32+
"source": [
33+
"Use this comman to install the *Caltechdata API*. This gives you access to all the functions for uploading data, making changes, etc However, remember that if you install it in an online code editor like Google Colab, you have to run this command each time you're connected back to the runtime."
34+
],
35+
"metadata": {
36+
"id": "vs_acZFiE-nF"
37+
}
38+
},
39+
{
40+
"cell_type": "code",
41+
"source": [
42+
"pip install caltechdata_api"
43+
],
44+
"metadata": {
45+
"colab": {
46+
"base_uri": "https://localhost:8080/"
47+
},
48+
"id": "s5BHYqHwlKVp",
49+
"outputId": "d9936859-53fd-4312-df21-af2c846b9f8d"
50+
},
51+
"execution_count": null,
52+
"outputs": [
53+
{
54+
"output_type": "stream",
55+
"name": "stdout",
56+
"text": [
57+
"Requirement already satisfied: caltechdata_api in /usr/local/lib/python3.10/dist-packages (1.8.2)\n",
58+
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (2.32.3)\n",
59+
"Requirement already satisfied: datacite>1.1.0 in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (1.2.0)\n",
60+
"Requirement already satisfied: tqdm>=4.62.3 in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (4.66.6)\n",
61+
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (6.0.2)\n",
62+
"Requirement already satisfied: s3fs in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (2024.10.0)\n",
63+
"Requirement already satisfied: cryptography in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (43.0.3)\n",
64+
"Requirement already satisfied: s3cmd in /usr/local/lib/python3.10/dist-packages (from caltechdata_api) (2.4.0)\n",
65+
"Requirement already satisfied: jsonschema>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from datacite>1.1.0->caltechdata_api) (4.23.0)\n",
66+
"Requirement already satisfied: lxml>=4.5.2 in /usr/local/lib/python3.10/dist-packages (from datacite>1.1.0->caltechdata_api) (5.3.0)\n",
67+
"Requirement already satisfied: idutils>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from datacite>1.1.0->caltechdata_api) (1.4.2)\n",
68+
"Requirement already satisfied: importlib-metadata>=6.11.0 in /usr/local/lib/python3.10/dist-packages (from datacite>1.1.0->caltechdata_api) (8.5.0)\n",
69+
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->caltechdata_api) (3.4.0)\n",
70+
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->caltechdata_api) (3.10)\n",
71+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->caltechdata_api) (2.2.3)\n",
72+
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->caltechdata_api) (2024.8.30)\n",
73+
"Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography->caltechdata_api) (1.17.1)\n",
74+
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from s3cmd->caltechdata_api) (2.8.2)\n",
75+
"Requirement already satisfied: python-magic in /usr/local/lib/python3.10/dist-packages (from s3cmd->caltechdata_api) (0.4.27)\n",
76+
"Requirement already satisfied: aiobotocore<3.0.0,>=2.5.4 in /usr/local/lib/python3.10/dist-packages (from s3fs->caltechdata_api) (2.15.2)\n",
77+
"Requirement already satisfied: fsspec==2024.10.0.* in /usr/local/lib/python3.10/dist-packages (from s3fs->caltechdata_api) (2024.10.0)\n",
78+
"Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.10/dist-packages (from s3fs->caltechdata_api) (3.10.10)\n",
79+
"Requirement already satisfied: botocore<1.35.37,>=1.35.16 in /usr/local/lib/python3.10/dist-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs->caltechdata_api) (1.35.36)\n",
80+
"Requirement already satisfied: wrapt<2.0.0,>=1.10.10 in /usr/local/lib/python3.10/dist-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs->caltechdata_api) (1.16.0)\n",
81+
"Requirement already satisfied: aioitertools<1.0.0,>=0.5.1 in /usr/local/lib/python3.10/dist-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs->caltechdata_api) (0.12.0)\n",
82+
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (2.4.3)\n",
83+
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (1.3.1)\n",
84+
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (24.2.0)\n",
85+
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (1.5.0)\n",
86+
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (6.1.0)\n",
87+
"Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (1.17.1)\n",
88+
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (4.0.3)\n",
89+
"Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography->caltechdata_api) (2.22)\n",
90+
"Requirement already satisfied: isbnlib>=3.10.8 in /usr/local/lib/python3.10/dist-packages (from idutils>=1.0.0->datacite>1.1.0->caltechdata_api) (3.10.14)\n",
91+
"Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=6.11.0->datacite>1.1.0->caltechdata_api) (3.20.2)\n",
92+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0.0->datacite>1.1.0->caltechdata_api) (2024.10.1)\n",
93+
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0.0->datacite>1.1.0->caltechdata_api) (0.35.1)\n",
94+
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0.0->datacite>1.1.0->caltechdata_api) (0.21.0)\n",
95+
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->s3cmd->caltechdata_api) (1.16.0)\n",
96+
"Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.37,>=1.35.16->aiobotocore<3.0.0,>=2.5.4->s3fs->caltechdata_api) (1.0.1)\n",
97+
"Requirement already satisfied: typing-extensions>=4.1.0 in /usr/local/lib/python3.10/dist-packages (from multidict<7.0,>=4.5->aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (4.12.2)\n",
98+
"Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp!=4.0.0a0,!=4.0.0a1->s3fs->caltechdata_api) (0.2.0)\n"
99+
]
100+
}
101+
]
102+
},
103+
{
104+
"cell_type": "markdown",
105+
"source": [
106+
"This command is used to import the *caltechdata_write* function from the *caltechdata_api*. This is the function used to make an upload to the caltechdata site.You can import other functions as well if you wish to do other things aside uploading."
107+
],
108+
"metadata": {
109+
"id": "RU9G1N45HL8W"
110+
}
111+
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": null,
115+
"metadata": {
116+
"id": "tHe1a9jFk9gn"
117+
},
118+
"outputs": [],
119+
"source": [
120+
"from caltechdata_api import caltechdata_write"
121+
]
122+
},
123+
{
124+
"cell_type": "markdown",
125+
"source": [
126+
"Before you proceede, make sure you generate your access token from the appropriate site. If you're uploading to the caltechdata test system, make sure you generate the access token at *https://data.caltechlibrary.dev/*. If you're uploading to the real system, make sure you generate the access token at *https://data.caltech.edu/* Put your API token here to authenticate access to the caltechdata api."
127+
],
128+
"metadata": {
129+
"id": "pdVHbtELH4Dm"
130+
}
131+
},
132+
{
133+
"cell_type": "code",
134+
"source": [
135+
"import os\n",
136+
"os.environ[\"RDMTOK\"] = \"your access token here\""
137+
],
138+
"metadata": {
139+
"id": "A1rOU5lvSrrm"
140+
},
141+
"execution_count": 30,
142+
"outputs": []
143+
},
144+
{
145+
"cell_type": "markdown",
146+
"source": [
147+
"The metadata contains basic information about the upload you're making such as the title of that data,creators etc."
148+
],
149+
"metadata": {
150+
"id": "A0BAd60EmoRd"
151+
}
152+
},
153+
{
154+
"cell_type": "code",
155+
"source": [
156+
"metadata = {\n",
157+
" \"titles\": [{\"title\": \"title of data\"}],\n",
158+
" \"creators\": [{\"name\": \"name of creator\"}],\n",
159+
" \"description\": \"brief description of the data\",\n",
160+
" \"publication_date\": \"date of upload\",\n",
161+
" \"types\" : {\"resourceType\": \"Dataset\", \"resourceTypeGeneral\": \"Dataset\"},\n",
162+
" \"descriptions\": []\n",
163+
"}"
164+
],
165+
"metadata": {
166+
"id": "sRFby6hVldz5"
167+
},
168+
"execution_count": null,
169+
"outputs": []
170+
},
171+
{
172+
"cell_type": "markdown",
173+
"source": [
174+
"There are two options here. Use *file_links* if the data you're uploading is on a remote server. Use *files* if the data you're uploading is on your local machine or google drive (if you're using colab)."
175+
],
176+
"metadata": {
177+
"id": "Abo_NlSCm42f"
178+
}
179+
},
180+
{
181+
"cell_type": "code",
182+
"source": [
183+
"#file_links = [\"links to the files\"]\n",
184+
"files = [\"path to the files\"]\n",
185+
"#files = file_links[0].split(\"/\")[-1]"
186+
],
187+
"metadata": {
188+
"id": "wbqM_N9-mGt4"
189+
},
190+
"execution_count": null,
191+
"outputs": []
192+
},
193+
{
194+
"cell_type": "markdown",
195+
"source": [
196+
"Put your access token here."
197+
],
198+
"metadata": {
199+
"id": "McI_z-hind1n"
200+
}
201+
},
202+
{
203+
"cell_type": "code",
204+
"source": [
205+
"token = \"your access token here\""
206+
],
207+
"metadata": {
208+
"id": "Y9O7758SliOr"
209+
},
210+
"execution_count": 31,
211+
"outputs": []
212+
},
213+
{
214+
"cell_type": "markdown",
215+
"source": [
216+
"If you used *files* from the previous snippet, then you don't have to use *file_links* here and vice versa. Also set production to True if you're uploading the data to the real caltechdata website. If you're uoloading to the test system, set production to False. Set publish to True if you want the data published on the website. All other arguments should remain the same.\n"
217+
],
218+
"metadata": {
219+
"id": "Zue_AfuMnlRX"
220+
}
221+
},
222+
{
223+
"cell_type": "code",
224+
"source": [
225+
"response = caltechdata_write(\n",
226+
" metadata = metadata,\n",
227+
" token=token,\n",
228+
" files=files,\n",
229+
" production=False,\n",
230+
" schema=\"43\",\n",
231+
" publish=False,\n",
232+
" #file_links= file_links,\n",
233+
" s3=None,\n",
234+
" community=None,\n",
235+
" authors=False,\n",
236+
" file_descriptions=[],\n",
237+
" s3_link=None,\n",
238+
" default_preview=None,\n",
239+
" review_message=None,\n",
240+
")\n",
241+
"\n",
242+
"print(response)"
243+
],
244+
"metadata": {
245+
"colab": {
246+
"base_uri": "https://localhost:8080/"
247+
},
248+
"id": "MKc6rv6llVPO",
249+
"outputId": "515864b6-63ce-436c-f652-208097ea720c"
250+
},
251+
"execution_count": 32,
252+
"outputs": [
253+
{
254+
"output_type": "stream",
255+
"name": "stdout",
256+
"text": [
257+
"wjms7-60x72\n"
258+
]
259+
}
260+
]
261+
}
262+
]
263+
}

codemeta.json

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,17 @@
3737
"@type": "Organization",
3838
"name": "Caltech"
3939
}
40+
},
41+
{
42+
"@type": "Person",
43+
"givenName": "Alexander A",
44+
"familyName": "Abakah",
45+
"affiliation": {
46+
"@type": "Organization",
47+
"name": "Caltech Library"
48+
},
49+
"email": "[email protected]",
50+
"@id": "https://orcid.org/0009-0003-5640-6691"
4051
}
4152
],
4253
"developmentStatus": "active",
@@ -72,4 +83,4 @@
7283
},
7384
"programmingLanguage": "Python",
7485
"identifier": "10.22002/bv2pv-2b295"
75-
}
86+
}

0 commit comments

Comments
 (0)