diff --git a/clowder_data_on_local.ipynb b/clowder_data_on_local.ipynb
new file mode 100644
index 0000000..0529fce
--- /dev/null
+++ b/clowder_data_on_local.ipynb
@@ -0,0 +1,767 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "4NGqRtRJpJYO"
+ },
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import json\n",
+ "import os\n",
+ "import tempfile\n",
+ "import re\n",
+ "import pathlib\n",
+ "import pandas as pd\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Please put your API key and clowder base url here."
+ ],
+ "metadata": {
+ "id": "YKlxthr4me3_"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "key=\"\" # your API key\n",
+ "url=\"https://clowder.ncsa.illinois.edu/clowder\" # url to clowder without the slash and api\""
+ ],
+ "metadata": {
+ "id": "6PMIYV5i1uUQ"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Create some variables we use later"
+ ],
+ "metadata": {
+ "id": "psAgdUyI12ms"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "base_headers = {'X-API-key': key}\n",
+ "headers = {**base_headers, 'Content-type': 'application/json',\n",
+ " 'accept': 'application/json'}\n",
+ "clowder_base_uri = f\"{url}/api\""
+ ],
+ "metadata": {
+ "id": "jhnMe7qAmCDl"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "List out all spaces you have access to"
+ ],
+ "metadata": {
+ "id": "AcBEPosEmqI4"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def list_spaces():\n",
+ " r = requests.get(clowder_base_uri + '/spaces/canEdit', headers=headers)\n",
+ " r.raise_for_status()\n",
+ " return pd.DataFrame.from_dict(r.json())\n",
+ "\n",
+ "list_spaces()"
+ ],
+ "metadata": {
+ "id": "r8glZflpmIoX",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 80
+ },
+ "outputId": "a0ec7c51-59a0-48ce-8d27-397adc58d7ec"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " id name description created\n",
+ "0 63c8414ce4b083a8c2580991 dt1 trial Wed Jan 18 18:58:20 UTC 2023"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " description | \n",
+ " created | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 63c8414ce4b083a8c2580991 | \n",
+ " dt1 | \n",
+ " trial | \n",
+ " Wed Jan 18 18:58:20 UTC 2023 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "List out all datasets"
+ ],
+ "metadata": {
+ "id": "eU9ZiXARmtkJ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def list_datasets():\n",
+ " r = requests.get(clowder_base_uri + '/datasets', headers=headers)\n",
+ " r.raise_for_status()\n",
+ " return pd.DataFrame.from_dict(r.json())\n",
+ "\n",
+ "list_datasets()"
+ ],
+ "metadata": {
+ "id": "LUtE5D9_mLXn",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 111
+ },
+ "outputId": "9e89c48f-c894-4b92-faf4-15209896b5e0"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " id name description \\\n",
+ "0 63c84b8ae4b083a8c2580a41 dataset 2 \n",
+ "1 63c84b5be4b083a8c2580a26 dataset1 \n",
+ "\n",
+ " created thumbnail authorId \\\n",
+ "0 Wed Jan 18 19:42:02 UTC 2023 None 635bfd4db7c7f35aca68df66 \n",
+ "1 Wed Jan 18 19:41:15 UTC 2023 None 635bfd4db7c7f35aca68df66 \n",
+ "\n",
+ " spaces resource_type \n",
+ "0 [63c8414ce4b083a8c2580991] dataset \n",
+ "1 [63c8414ce4b083a8c2580991] dataset "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " description | \n",
+ " created | \n",
+ " thumbnail | \n",
+ " authorId | \n",
+ " spaces | \n",
+ " resource_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 63c84b8ae4b083a8c2580a41 | \n",
+ " dataset 2 | \n",
+ " | \n",
+ " Wed Jan 18 19:42:02 UTC 2023 | \n",
+ " None | \n",
+ " 635bfd4db7c7f35aca68df66 | \n",
+ " [63c8414ce4b083a8c2580991] | \n",
+ " dataset | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 63c84b5be4b083a8c2580a26 | \n",
+ " dataset1 | \n",
+ " | \n",
+ " Wed Jan 18 19:41:15 UTC 2023 | \n",
+ " None | \n",
+ " 635bfd4db7c7f35aca68df66 | \n",
+ " [63c8414ce4b083a8c2580991] | \n",
+ " dataset | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "List out all files in a particular dataset.\n",
+ "It takes datasetId as parameter."
+ ],
+ "metadata": {
+ "id": "_KLqApxomzA_"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def list_files_in_dataset(datasetId):\n",
+ " r = requests.get(clowder_base_uri + '/datasets/' + datasetId + '/listAllFiles', headers=headers)\n",
+ " r.raise_for_status()\n",
+ " return pd.DataFrame.from_dict(r.json())\n",
+ "\n",
+ "list_files_in_dataset('63c84b8ae4b083a8c2580a41')"
+ ],
+ "metadata": {
+ "id": "pg3YGSzomPkP",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 80
+ },
+ "outputId": "b609b15d-b37f-40ec-886e-c54a9eee8291"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " size date-created id contentType \\\n",
+ "0 57 Wed Jan 18 19:42:12 UTC 2023 63c84b94e4b083a8c2580a45 text/plain \n",
+ "\n",
+ " filename \n",
+ "0 a2.txt "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " size | \n",
+ " date-created | \n",
+ " id | \n",
+ " contentType | \n",
+ " filename | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 57 | \n",
+ " Wed Jan 18 19:42:12 UTC 2023 | \n",
+ " 63c84b94e4b083a8c2580a45 | \n",
+ " text/plain | \n",
+ " a2.txt | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Retrives the url to download the contents of a dataset as zip. It takes datasetId as parameter.\n",
+ "Returns:\n",
+ "\n",
+ "* url - url to download from\n",
+ "* filename - zip file name\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "n0VhgvxvnDMk"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def get_dataset_download_link(datasetId):\n",
+ " # get dataset name\n",
+ " r = requests.get(f\"{clowder_base_uri}/datasets/{datasetId}\", headers=headers)\n",
+ " r.raise_for_status()\n",
+ " print(r.json())\n",
+ " filename = r.json()[\"name\"] + \".zip\"\n",
+ " url = f\"{clowder_base_uri}/datasets/{datasetId}//download?bagit=false&compression=-1&tracking=true\"\n",
+ " return {\"url\": url, \"filename\": filename}\n",
+ "\n",
+ "\n",
+ "get_dataset_download_link('63c84b8ae4b083a8c2580a41')"
+ ],
+ "metadata": {
+ "id": "FK0CAA2QmQN-",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "b87eba94-b5a6-4795-ce32-9ce68ae1be1f"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'id': '63c84b8ae4b083a8c2580a41', 'name': 'dataset 2', 'description': '', 'created': 'Wed Jan 18 19:42:02 UTC 2023', 'thumbnail': None, 'authorId': '635bfd4db7c7f35aca68df66', 'spaces': ['63c8414ce4b083a8c2580991'], 'resource_type': 'dataset'}\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'url': 'https://clowder.ncsa.illinois.edu/clowder/api/datasets/63c84b8ae4b083a8c2580a41//download?bagit=false&compression=-1&tracking=true',\n",
+ " 'filename': 'dataset 2.zip'}"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "Retrives the url to download a file. It takes fileId as parameter.\n",
+ "\n",
+ "Returns:\n",
+ "\n",
+ "\n",
+ "\n",
+ "* url - url to download from\n",
+ "\n",
+ "* filename - original filename\n",
+ "* bytes - size of the file\n",
+ "\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "RtAoGz32cG66"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def get_file_download_link(fileId):\n",
+ " # get file name\n",
+ " r = requests.get(f\"{clowder_base_uri}/files/{fileId}/metadata?key={key}\")\n",
+ " r.raise_for_status()\n",
+ " filename = r.json()[\"filename\"]\n",
+ " bloburl = f\"{clowder_base_uri}/files/{fileId}/blob?key={key}\"\n",
+ " return {\"url\": bloburl, \"filename\": filename, \"bytes\": r.json()[\"size\"]}\n",
+ "\n",
+ "get_file_download_link('63c84b73e4b083a8c2580a2a')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "bhN5XjIGcF_k",
+ "outputId": "9bd74f96-a476-4154-ee10-8522d0058445"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'url': 'https://clowder.ncsa.illinois.edu/clowder/api/files/63c84b73e4b083a8c2580a2a/blob?key=f40d04bd-05cd-4514-a7c4-1b6f7d65099c',\n",
+ " 'filename': 'a1.txt',\n",
+ " 'bytes': '55'}"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Download dataset/file to local disk.\n",
+ "\n",
+ "Paramaets:\n",
+ "\n",
+ "* url - url to download from\n",
+ "\n",
+ "* path - user can specify path of the file\n",
+ "* chunkSize - user can specify the chunk size which will denote the download speed. the file/dataset will be downloaded in the provided sized chunks\n"
+ ],
+ "metadata": {
+ "id": "0zaHOORXgkYs"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def download_to_disk(url, inputfilename = None, chunk_size = 10 * 1024):\n",
+ " result = requests.get(url, stream=True)\n",
+ "\n",
+ " if inputfilename is None:\n",
+ " (inputfile, inputfilename) = tempfile.mkstemp(suffix='.txt')\n",
+ " else:\n",
+ " inputfile = os.open(inputfilename, os.O_WRONLY | os.O_CREAT, 0o600)\n",
+ " current_dir = os.getcwd()\n",
+ " inputfilename = os.path.join(current_dir, inputfilename)\n",
+ " try:\n",
+ " with os.fdopen(inputfile, \"wb\") as outputfile:\n",
+ " for chunk in result.iter_content(chunk_size):\n",
+ " outputfile.write(chunk)\n",
+ " return inputfilename\n",
+ " except Exception:\n",
+ " os.remove(inputfilename)\n",
+ " raise\n",
+ "\n",
+ "result = get_file_download_link('63c84b73e4b083a8c2580a2a')\n",
+ "download_to_disk(result[\"url\"], result[\"filename\"], 10 * 1024)"
+ ],
+ "metadata": {
+ "id": "ES7vMMXSmSjY",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "outputId": "665b59e9-fe4c-418f-fcb1-b14be815c7eb"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'/content/a1.txt'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file