Skip to content

Commit cac5619

Browse files
authored
Merge branch 'master' into dependabot/pip/certifi-2023.7.22
2 parents 591dbcd + 9736f64 commit cac5619

File tree

20 files changed

+491
-165
lines changed

20 files changed

+491
-165
lines changed

.github/workflows/docker.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Docker
33
# This will run when:
44
# - a new release is created, to make sure the right tags of the
55
# docker images are pushed (expects tags to be v1.8.4).
6-
# - when new code is pushed to main/develop to push the tags
6+
# - when new code is pushed to master/develop to push the tags
77
# latest and develop
88
# - when a pull request is created and updated to make sure the
99
# Dockerfile is still valid.
@@ -18,7 +18,7 @@ on:
1818

1919
pull_request:
2020

21-
# Certain actions will only run when this is the main repo.
21+
# Certain actions will only run when this is the master repo.
2222
env:
2323
MAIN_REPO: clowder-framework/pyclowder
2424
DOCKERHUB_ORG: clowder
@@ -37,7 +37,7 @@ jobs:
3737
include:
3838
- name: wordcount
3939
FOLDER: sample-extractors/wordcount
40-
PLATFORM: "linux/amd64,linux/arm64"
40+
PLATFORM: "linux/amd64"
4141
steps:
4242
- uses: actions/checkout@v2
4343

@@ -56,7 +56,7 @@ jobs:
5656
# should we push to dockerhub, and is there a README
5757
DOCKERHUB_PUSH="false"
5858
DOCKERHUB_README="false"
59-
if [ "$BRANCH" == "main" -a "${{ github.repository }}" == "${{ env.MAIN_REPO }}" ]; then
59+
if [ "$BRANCH" == "master" -a "${{ github.repository }}" == "${{ env.MAIN_REPO }}" ]; then
6060
if [ "${{ secrets.DOCKERHUB_USERNAME }}" != "" -a "${{ secrets.DOCKERHUB_PASSWORD }}" != "" ]; then
6161
DOCKERHUB_PUSH="true"
6262
if [ -e "${{ matrix.FOLDER }}/README.md" ]; then
@@ -66,7 +66,7 @@ jobs:
6666
fi
6767
6868
# calculate the version and all tags
69-
if [ "$BRANCH" == "main" ]; then
69+
if [ "$BRANCH" == "master" ]; then
7070
VERSION="$(awk '/"version":/ { print $2 }' ${{ matrix.FOLDER }}/extractor_info.json | sed 's/^.*"\([0-9\.]*\)".*$/\1/')"
7171
tags="latest"
7272
oldversion=""

CHANGELOG.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,37 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](https://semver.org/).
77

8+
9+
## 3.0.7 - 2023-10-11
10+
11+
### Added
12+
13+
- Modified v1 and v2 endpoints to ignore trailing slashes on Clowder host URLs.
14+
15+
- ## 3.0.6 - 2023-10-10
16+
17+
### Added
18+
19+
- Allow folder id to be passed to api/v2 upload_to_dataset
20+
21+
- ## 3.0.5 - 2023-10-09
22+
23+
### Added
24+
25+
- Support for deletion of individual files.
26+
27+
## 3.0.4 - 2023-09-27
28+
29+
### Fixed
30+
31+
- Fixed host bug on v1 file thumbnail endpoint.
32+
33+
## 3.0.3 - 2023-08-29
34+
35+
### Added
36+
37+
- Support for Clowder V2 thumbnails endpoints.
38+
839
## 3.0.2 - 2023-07-20
940

1041
### Added
@@ -46,6 +77,12 @@ This will result in only those donwloads to be counted by users, not extractors.
4677

4778
- Ask not to track a download from an extractor.
4879

80+
## Unreleased
81+
82+
### Added
83+
- Add support for `EXTRACTOR_KEY` and `CLOWDER_EMAIL` environment variables to register
84+
an extractor for just one user.
85+
4986
## 2.6.0 - 2022-06-14
5087

5188
This will change how clowder sees the extractors. If you have an extractor, and you specify

pyclowder/api/v1/datasets.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
import os
44
import tempfile
5-
5+
import posixpath
66
import requests
77
from pyclowder.client import ClowderClient
88
from pyclowder.collections import get_datasets, get_child_collections, delete as delete_collection
@@ -22,7 +22,7 @@ def create_empty(connector, client, datasetname, description, parentid=None, spa
2222
"""
2323
logger = logging.getLogger(__name__)
2424

25-
url = '%s/api/datasets/createempty?key=%s' % (client.host, client.key)
25+
url = posixpath.join(client.host, 'api/datasets/createempty?key=%s' % client.key)
2626

2727
if parentid:
2828
if spaceid:
@@ -61,7 +61,7 @@ def delete(connector, client, datasetid):
6161
client -- ClowderClient containing authentication credentials
6262
datasetid -- the dataset to delete
6363
"""
64-
url = "%s/api/datasets/%s?key=%s" % (client.host, datasetid, client.key)
64+
url = posixpath.join(client.host, "api/datasets/%s?key=%s" % (datasetid, client.key))
6565

6666
result = requests.delete(url, verify=connector.ssl_verify if connector else True)
6767
result.raise_for_status()
@@ -102,7 +102,7 @@ def download(connector, client, datasetid):
102102
connector.message_process({"type": "dataset", "id": datasetid}, "Downloading dataset.")
103103

104104
# fetch dataset zipfile
105-
url = '%s/api/datasets/%s/download?key=%s' % (client.host, datasetid,client.key)
105+
url = posixpath.join(client.host, 'api/datasets/%s/download?key=%s' % datasetid,client.key)
106106
result = requests.get(url, stream=True,
107107
verify=connector.ssl_verify if connector else True)
108108
result.raise_for_status()
@@ -124,7 +124,7 @@ def download_metadata(connector, client, datasetid, extractor=None):
124124
extractor -- extractor name to filter results (if only one extractor's metadata is desired)
125125
"""
126126
filterstring = "" if extractor is None else "&extractor=%s" % extractor
127-
url = '%s/api/datasets/%s/metadata?key=%s' % (client.host, datasetid, client.key + filterstring)
127+
url = posixpath.join(client.host, 'api/datasets/%s/metadata.jsonld?key=%s' % (datasetid, client.key + filterstring))
128128

129129
# fetch data
130130
result = requests.get(url, stream=True,
@@ -142,7 +142,7 @@ def get_info(connector, client, datasetid):
142142
datasetid -- the dataset to get info of
143143
"""
144144

145-
url = "%s/api/datasets/%s?key=%s" % (client.host, datasetid, client.key)
145+
url = posixpath.join(client.host, "api/datasets/%s?key=%s" % (datasetid, client.key))
146146

147147
result = requests.get(url, verify=connector.ssl_verify if connector else True)
148148
result.raise_for_status()
@@ -157,7 +157,7 @@ def get_file_list(connector, client, datasetid):
157157
client -- ClowderClient containing authentication credentials
158158
datasetid -- the dataset to get filelist of
159159
"""
160-
url = "%s/api/datasets/%s/files?key=%s" % (client.host, datasetid, client.key)
160+
url = posixpath.join(client.host, "api/datasets/%s/files?key=%s" % (datasetid, client.key))
161161

162162
result = requests.get(url, verify=connector.ssl_verify if connector else True)
163163
result.raise_for_status()
@@ -175,7 +175,7 @@ def remove_metadata(connector, client, datasetid, extractor=None):
175175
!!! ALL JSON-LD METADATA WILL BE REMOVED IF NO extractor PROVIDED !!!
176176
"""
177177
filterstring = "" if extractor is None else "&extractor=%s" % extractor
178-
url = '%s/api/datasets/%s/metadata?key=%s' % (client.host, datasetid, client.key)
178+
url = posixpath.join(client.host, 'api/datasets/%s/metadata.jsonld?key=%s' % (datasetid, client.key))
179179

180180
# fetch data
181181
result = requests.delete(url, stream=True, verify=connector.ssl_verify if connector else True)
@@ -192,7 +192,7 @@ def submit_extraction(connector, client, datasetid, extractorname):
192192
"""
193193
headers = {'Content-Type': 'application/json'}
194194

195-
url = "%s/api/datasets/%s/extractions?key=%s" % (client.host, datasetid, client.key)
195+
url = posixpath.join(client.host, "api/datasets/%s/extractions?key=%s" % (datasetid, client.key))
196196

197197
result = requests.post(url,
198198
headers=headers,
@@ -238,7 +238,7 @@ def upload_tags(connector, client, datasetid, tags):
238238
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid}, "Uploading dataset tags.")
239239

240240
headers = {'Content-Type': 'application/json'}
241-
url = '%s/api/datasets/%s/tags?key=%s' % (client.host, datasetid, client.key)
241+
url = posixpath.join(client.host, 'api/datasets/%s/tags?key=%s' % (datasetid, client.key))
242242
result = connector.post(url, headers=headers, data=json.dumps(tags),
243243
verify=connector.ssl_verify if connector else True)
244244

@@ -255,11 +255,45 @@ def upload_metadata(connector, client, datasetid, metadata):
255255
headers = {'Content-Type': 'application/json'}
256256
connector.message_process({"type": "dataset", "id": datasetid}, "Uploading dataset metadata.")
257257

258-
url = '%s/api/datasets/%s/metadata?key=%s' % (client.host, datasetid, client.key)
258+
url = posixpath.join(client.host, 'api/datasets/%s/metadata.jsonld?key=%s' % (datasetid, client.key))
259259
result = requests.post(url, headers=headers, data=json.dumps(metadata),
260260
verify=connector.ssl_verify if connector else True)
261261
result.raise_for_status()
262262

263+
def upload_thumbnail(connector, host, key, datasetid, thumbnail):
264+
"""Upload thumbnail to Clowder.
265+
266+
Keyword arguments:
267+
connector -- connector information, used to get missing parameters and send status updates
268+
host -- the clowder host, including http and port, should end with a /
269+
key -- the secret key to login to clowder
270+
datasetid -- the dataset that the thumbnail should be associated with
271+
thumbnail -- the file containing the thumbnail
272+
"""
273+
logger = logging.getLogger(__name__)
274+
logger.info("Upload thumbnails to datasets is not available in V1")
275+
276+
277+
def upload_preview(connector, host, key, datasetid, previewfile, previewmetadata=None, preview_mimetype=None,
278+
visualization_name=None, visualization_description=None, visualization_config_data=None,
279+
visualization_component_id=None):
280+
"""Upload preview to Clowder.
281+
282+
Keyword arguments:
283+
connector -- connector information, used to get missing parameters and send status updates
284+
host -- the clowder host, including http and port, should end with a /
285+
key -- the secret key to login to clowder
286+
datasetid -- the dataset that is currently being processed
287+
previewfile -- the file containing the preview
288+
previewmetadata -- any metadata to be associated with preview, can contain a section_id
289+
to indicate the section this preview should be associated with.
290+
preview_mimetype -- (optional) MIME type of the preview file. By default, this is obtained from the
291+
file itself and this parameter can be ignored. E.g. 'application/vnd.clowder+custom+xml'
292+
"""
293+
294+
logger = logging.getLogger(__name__)
295+
logger.info("Upload preview to datasets is not available in V1")
296+
263297

264298
# TODO not done yet, need more testing
265299
class DatasetsApi(object):
@@ -353,4 +387,4 @@ def add_metadata(self, dataset_id, metadata):
353387
try:
354388
return self.client.post("/datasets/%s/metadata" % dataset_id, metadata)
355389
except Exception as e:
356-
logging.error("Error upload to dataset %s: %s" % (dataset_id, str(e)))
390+
logging.error("Error upload to dataset %s: %s" % (dataset_id, str(e)))

pyclowder/api/v1/files.py

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import logging
88
import os
99
import tempfile
10-
10+
import posixpath
1111
import requests
1212
from requests_toolbelt.multipart.encoder import MultipartEncoder
1313

@@ -43,7 +43,7 @@ def get_download_url(connector, client, fileid, intermediatefileid=None, ext="")
4343
if not intermediatefileid:
4444
intermediatefileid = fileid
4545

46-
url = '%s/api/files/%s?key=%s' % (client.host, intermediatefileid, client.key)
46+
url = posixpath.join(client.host, 'api/files/%s?key=%s' % (intermediatefileid, client.key))
4747
return url
4848

4949

@@ -65,7 +65,7 @@ def download(connector, client, fileid, intermediatefileid=None, ext=""):
6565
if not intermediatefileid:
6666
intermediatefileid = fileid
6767

68-
url = '%s/api/files/%s?key=%s' % (client.host, intermediatefileid, client.key)
68+
url = posixpath.join(client.host, 'api/files/%s?key=%s' % (intermediatefileid, client.key))
6969
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
7070

7171
(inputfile, inputfilename) = tempfile.mkstemp(suffix=ext)
@@ -89,7 +89,7 @@ def download_info(connector, client, fileid):
8989
fileid -- the file to fetch metadata of
9090
"""
9191

92-
url = '%s/api/files/%s/metadata?key=%s' % (client.host, fileid, client.key)
92+
url = posixpath.join(client.host, 'api/files/%s/metadata?key=%s' % (fileid, client.key))
9393

9494
# fetch data
9595
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
@@ -121,14 +121,30 @@ def download_metadata(connector, client, fileid, extractor=None):
121121
"""
122122

123123
filterstring = "" if extractor is None else "&extractor=%s" % extractor
124-
url = '%s/api/files/%s/metadata.jsonld?key=%s%s' % (client.host, fileid, client.key, filterstring)
124+
url = posixpath.join(client.host, 'api/files/%s/metadata.jsonld?key=%s%s' % (fileid, client.key, filterstring))
125125

126126
# fetch data
127127
result = connector.get(url, stream=True, verify=connector.ssl_verify if connector else True)
128128

129129
return result
130130

131131

132+
def delete(connector, client, fileid):
133+
"""Delete file from Clowder.
134+
135+
Keyword arguments:
136+
connector -- connector information, used to get missing parameters and send status updates
137+
client -- ClowderClient containing authentication credentials
138+
fileid -- the dataset to delete
139+
"""
140+
url = posixpath.join(client.host, "api/files/%s?key=%s" % (fileid, client.key))
141+
142+
result = requests.delete(url, verify=connector.ssl_verify if connector else True)
143+
result.raise_for_status()
144+
145+
return json.loads(result.text)
146+
147+
132148
def submit_extraction(connector, client, fileid, extractorname):
133149
"""Submit file for extraction by given extractor.
134150
@@ -139,7 +155,7 @@ def submit_extraction(connector, client, fileid, extractorname):
139155
extractorname -- registered name of extractor to trigger
140156
"""
141157

142-
url = "%s/api/files/%s/extractions?key=%s" % (client.host, fileid, client.key)
158+
url = posixpath.join(client.host, "api/files/%s/extractions?key=%s" % (fileid, client.key))
143159

144160
result = connector.post(url,
145161
headers={'Content-Type': 'application/json'},
@@ -213,7 +229,7 @@ def upload_metadata(connector, client, fileid, metadata):
213229
connector.message_process({"type": "file", "id": fileid}, "Uploading file metadata.")
214230

215231
headers = {'Content-Type': 'application/json'}
216-
url = '%s/api/files/%s/metadata.jsonld?key=%s' % (client.host, fileid, client.key)
232+
url = posixpath.join(client.host, 'api/files/%s/metadata.jsonld?key=%s' % (fileid, client.key))
217233
result = connector.post(url, headers=headers, data=json.dumps(metadata),
218234
verify=connector.ssl_verify if connector else True)
219235

@@ -239,7 +255,7 @@ def upload_preview(connector, client, fileid, previewfile, previewmetadata=None,
239255
headers = {'Content-Type': 'application/json'}
240256

241257
# upload preview
242-
url = '%s/api/previews?key=%s' % (client.host, client.key)
258+
url = posixpath.join(client.host, 'api/previews?key=%s' % client.key)
243259
with open(previewfile, 'rb') as filebytes:
244260
# If a custom preview file MIME type is provided, use it to generate the preview file object.
245261
if preview_mimetype is not None:
@@ -253,13 +269,13 @@ def upload_preview(connector, client, fileid, previewfile, previewmetadata=None,
253269

254270
# associate uploaded preview with orginal file
255271
if fileid and not (previewmetadata and 'section_id' in previewmetadata and previewmetadata['section_id']):
256-
url = '%s/api/files/%s/previews/%s?key=%s' % (client.host, fileid, previewid, client.key)
272+
url = posixpath.join(client.host, 'api/files/%s/previews/%s?key=%s' % (fileid, previewid, client.key))
257273
result = connector.post(url, headers=headers, data=json.dumps({}),
258274
verify=connector.ssl_verify if connector else True)
259275

260276
# associate metadata with preview
261277
if previewmetadata is not None:
262-
url = '%s/api/previews/%s/metadata?key=%s' % (client.host, previewid, client.key)
278+
url = posixpath.join(client.host, 'api/previews/%s/metadata?key=%s' % (previewid, client.key))
263279
result = connector.post(url, headers=headers, data=json.dumps(previewmetadata),
264280
verify=connector.ssl_verify if connector else True)
265281

@@ -279,7 +295,7 @@ def upload_tags(connector, client, fileid, tags):
279295
connector.message_process({"type": "file", "id": fileid}, "Uploading file tags.")
280296

281297
headers = {'Content-Type': 'application/json'}
282-
url = '%s/api/files/%s/tags?key=%s' % (client.host, fileid, client.key)
298+
url = posixpath.join(client.host, 'api/files/%s/tags?key=%s' % (fileid, client.key))
283299
result = connector.post(url, headers=headers, data=json.dumps(tags),
284300
verify=connector.ssl_verify if connector else True)
285301

@@ -295,18 +311,18 @@ def upload_thumbnail(connector, client, fileid, thumbnail):
295311
"""
296312

297313
logger = logging.getLogger(__name__)
298-
url = client.host + 'api/fileThumbnail?key=' + client.key
314+
url = posixpath.join(client.host, 'api/fileThumbnail?key=%s' % client.key)
299315

300316
# upload preview
301317
with open(thumbnail, 'rb') as inputfile:
302318
result = connector.post(url, files={"File": inputfile}, verify=connector.ssl_verify if connector else True)
303319
thumbnailid = result.json()['id']
304320
logger.debug("thumbnail id = [%s]", thumbnailid)
305321

306-
# associate uploaded preview with orginal file/dataset
322+
# associate uploaded preview with original file/dataset
307323
if fileid:
308324
headers = {'Content-Type': 'application/json'}
309-
url = client.host + 'api/files/' + fileid + '/thumbnails/' + thumbnailid + '?key=' + client.key
325+
url = posixpath.join(client.host, 'api/files/%s/thumbnails/%s?key=%s' % (fileid, thumbnailid, client.key))
310326
connector.post(url, headers=headers, data=json.dumps({}), verify=connector.ssl_verify if connector else True)
311327

312328
return thumbnailid
@@ -336,7 +352,7 @@ def upload_to_dataset(connector, client, datasetid, filepath, check_duplicate=Fa
336352
if filepath.startswith(connector.mounted_paths[source_path]):
337353
return _upload_to_dataset_local(connector, client, datasetid, filepath)
338354

339-
url = '%s/api/uploadToDataset/%s?key=%s' % (client.host, datasetid, client.key)
355+
url = posixpath.join(client.host, 'api/uploadToDataset/%s?key=%s' % (datasetid, client.key))
340356

341357
if os.path.exists(filepath):
342358
filename = os.path.basename(filepath)
@@ -365,7 +381,7 @@ def _upload_to_dataset_local(connector, client, datasetid, filepath):
365381
"""
366382

367383
logger = logging.getLogger(__name__)
368-
url = '%s/api/uploadToDataset/%s?key=%s' % (client.host, datasetid, client.key)
384+
url = posixpath.join(client.host, 'api/uploadToDataset/%s?key=%s' % (datasetid, client.key))
369385

370386
if os.path.exists(filepath):
371387
# Replace local path with remote path before uploading

0 commit comments

Comments
 (0)