Skip to content

Commit 3b7e9df

Browse files
committed
Merge branch 'refs/heads/release/v2.0-beta-3' into feature/update-user
# Conflicts: # backend/app/routers/authentication.py
2 parents de0ee58 + 700aeb1 commit 3b7e9df

File tree

115 files changed

+2028
-399
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+2028
-399
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ scripts/keycloak/data/*
5959
# ignore clowder chart deps
6060
deployments/kubernetes/charts/clowder2/charts
6161
deployments/kubernetes/charts/clowder2/*clowder2-software-dev.yaml
62+
*secret*.yaml
6263

6364
# Environments
6465
.env

.run/uvicorn.run.xml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
<component name="ProjectRunConfigurationManager">
22
<configuration default="false" name="uvicorn" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
33
<module name="clowder2" />
4-
<option name="ENV_FILES" value="" />
54
<option name="INTERPRETER_OPTIONS" value="" />
65
<option name="PARENT_ENVS" value="true" />
76
<envs>
@@ -14,7 +13,7 @@
1413
<option name="ADD_SOURCE_ROOTS" value="true" />
1514
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
1615
<option name="SCRIPT_NAME" value="uvicorn" />
17-
<option name="PARAMETERS" value="app.main:app --reload --host 0.0.0.0" />
16+
<option name="PARAMETERS" value="app.main:app --host 0.0.0.0 --workers 17" />
1817
<option name="SHOW_COMMAND_LINE" value="false" />
1918
<option name="EMULATE_TERMINAL" value="false" />
2019
<option name="MODULE_MODE" value="true" />

CHANGELOG.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,34 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

8+
## [v2.0.0-beta.3] - 2024-07-29
9+
10+
### Added
11+
12+
- License management
13+
- Release dataset with versions
14+
- Enable and disable user account through Keycloak
15+
- Jupyterhub integration
16+
- Interface for creating and editing matching criteria for triggering extractors
17+
- Interface for editing metadata definitions
18+
- My dataset tab listing all the datasets created by the user
19+
- Drag and drop upload multiple files
20+
- Footer with links to documentation, source code, and contact information
21+
- Documentation through MKDocs
22+
23+
### Changed
24+
25+
- Allow public datasets and files to be searchable
26+
- List all the extractors with the ability to enable/disable the extractors
27+
- Filter listeners based on their support for file or dataset
28+
- Helm chart updated to support custom existing secret
29+
30+
### Fixed
31+
32+
- Clowder registration link on the top bar
33+
- Case-insensitive search
34+
- Download count immediately increments after download
35+
836
## [v2.0.0-beta.2] - 2024-02-16
937

1038
### Added

backend/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ ENV PATH="/code/.venv/bin:$PATH"
2222
COPY ./app /code/app
2323

2424
# launch app using uvicorn
25-
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]
25+
# Number of recommended workers is 2 x number_of_cores +1
26+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "17"]

backend/app/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class Settings(BaseSettings):
99
API_V2_STR: str = "/api/v2"
1010
admin_email: str = "[email protected]"
1111
frontend_url: str = "http://localhost:3000"
12-
version: str = "2.0.0-beta.2"
12+
version: str = "2.0.0-beta.3"
1313

1414
# Unique secret for hashing API keys. Generate with `openssl rand -hex 32`
1515
local_auth_secret = "clowder_secret_key"

backend/app/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
description="A cloud native data management framework to support any research domain. Clowder was "
8787
"developed to help researchers and scientists in data intensive domains manage raw data, complex "
8888
"metadata, and automatic data pipelines. ",
89-
version="2.0.0-beta.2",
89+
version="2.0.0-beta.3",
9090
contact={"name": "Clowder", "url": "https://clowderframework.org/"},
9191
license_info={
9292
"name": "Apache 2.0",
@@ -316,7 +316,9 @@ async def startup_beanie():
316316
ThumbnailDBViewList,
317317
LicenseDB,
318318
],
319-
recreate_views=True,
319+
# If view exists, will not recreate
320+
# When view query changes, make sure to manually drop view and recreate
321+
recreate_views=False,
320322
)
321323

322324

backend/app/models/metadata.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,9 +307,11 @@ async def validate_context(
307307
detail="Context is required",
308308
)
309309
if context is not None:
310-
pass
310+
# TODO validate context
311+
return content
311312
if context_url is not None:
312-
pass
313+
# TODO validate context
314+
return content
313315
if definition is not None:
314316
if (
315317
md_def := await MetadataDefinitionDB.find_one(
@@ -322,7 +324,7 @@ async def validate_context(
322324
status_code=400,
323325
detail=f"{definition} is not valid metadata definition",
324326
)
325-
return content
327+
return content
326328

327329

328330
def deep_update(orig: dict, new: dict):

backend/app/routers/authentication.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
)
1010
from app.models.datasets import DatasetDBViewList
1111
from app.models.users import UserDB, UserIn, UserLogin, UserOut, UserUpdate
12+
from app.routers.utils import save_refresh_token
1213
from beanie import PydanticObjectId
1314
from fastapi import APIRouter, Depends, HTTPException
1415
from keycloak.exceptions import (
@@ -71,6 +72,7 @@ async def save_user(userIn: UserIn):
7172
async def login(userIn: UserLogin):
7273
try:
7374
token = keycloak_openid.token(userIn.email, userIn.password)
75+
await save_refresh_token(token["refresh_token"], userIn.email)
7476
return {"token": token["access_token"]}
7577
# bad credentials
7678
except KeycloakAuthenticationError as e:

backend/app/routers/datasets.py

Lines changed: 98 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
from pymongo import DESCENDING
7474
from rocrate.model.person import Person
7575
from rocrate.rocrate import ROCrate
76+
from starlette.concurrency import run_in_threadpool
7677

7778
router = APIRouter()
7879
security = HTTPBearer()
@@ -1192,16 +1193,26 @@ async def download_dataset(
11921193
bag_info_path = os.path.join(current_temp_dir, "bag-info.txt")
11931194
tagmanifest_path = os.path.join(current_temp_dir, "tagmanifest-md5.txt")
11941195

1195-
with open(manifest_path, "w") as f:
1196-
pass # Create empty file so no errors later if the dataset is empty
1197-
1198-
with open(bagit_path, "w") as f:
1199-
f.write("Bag-Software-Agent: clowder.ncsa.illinois.edu" + "\n")
1200-
f.write("Bagging-Date: " + str(datetime.datetime.now()) + "\n")
1196+
await run_in_threadpool(lambda: open(manifest_path, "w").close())
1197+
await run_in_threadpool(lambda: open(manifest_path, "w").close())
1198+
await run_in_threadpool(
1199+
lambda: open(bagit_path, "w").write(
1200+
"Bag-Software-Agent: clowder.ncsa.illinois.edu"
1201+
+ "\n"
1202+
+ "Bagging-Date: "
1203+
+ str(datetime.datetime.now())
1204+
+ "\n"
1205+
)
1206+
)
12011207

1202-
with open(bag_info_path, "w") as f:
1203-
f.write("BagIt-Version: 0.97" + "\n")
1204-
f.write("Tag-File-Character-Encoding: UTF-8" + "\n")
1208+
await run_in_threadpool(
1209+
lambda: open(bag_info_path, "w").write(
1210+
"BagIt-Version: 0.97"
1211+
+ "\n"
1212+
+ "Tag-File-Character-Encoding: UTF-8"
1213+
+ "\n"
1214+
)
1215+
)
12051216

12061217
# Write dataset metadata if found
12071218
metadata = await MetadataDB.find(
@@ -1214,6 +1225,10 @@ async def download_dataset(
12141225
metadata_content = json_util.dumps(metadata)
12151226
with open(datasetmetadata_path, "w") as f:
12161227
f.write(metadata_content)
1228+
await run_in_threadpool(
1229+
lambda: open(datasetmetadata_path, "w").write(metadata_content)
1230+
)
1231+
12171232
crate.add_file(
12181233
datasetmetadata_path,
12191234
dest_path="metadata/_dataset_metadata.json",
@@ -1236,16 +1251,20 @@ async def download_dataset(
12361251
hierarchy = await _get_folder_hierarchy(file.folder_id, "")
12371252
dest_folder = os.path.join(current_temp_dir, hierarchy.lstrip("/"))
12381253
if not os.path.isdir(dest_folder):
1239-
os.makedirs(dest_folder, exist_ok=True)
1254+
await run_in_threadpool(os.makedirs, dest_folder, exist_ok=True)
12401255
file_name = hierarchy + file_name
12411256
current_file_path = os.path.join(current_temp_dir, file_name.lstrip("/"))
12421257

12431258
content = fs.get_object(settings.MINIO_BUCKET_NAME, bytes_file_id)
12441259
file_md5_hash = hashlib.md5(content.data).hexdigest()
1245-
with open(current_file_path, "wb") as f1:
1246-
f1.write(content.data)
1247-
with open(manifest_path, "a") as mpf:
1248-
mpf.write(file_md5_hash + " " + file_name + "\n")
1260+
await run_in_threadpool(
1261+
lambda: open(current_file_path, "wb").write(content.data)
1262+
)
1263+
await run_in_threadpool(
1264+
lambda: open(manifest_path, "a").write(
1265+
file_md5_hash + " " + file_name + "\n"
1266+
)
1267+
)
12491268
crate.add_file(
12501269
current_file_path,
12511270
dest_path="data/" + file_name,
@@ -1266,23 +1285,43 @@ async def download_dataset(
12661285
current_temp_dir, metadata_filename
12671286
)
12681287
metadata_content = json_util.dumps(metadata)
1269-
with open(metadata_filename_temp_path, "w") as f:
1270-
f.write(metadata_content)
1288+
await run_in_threadpool(
1289+
lambda: open(metadata_filename_temp_path, "w").write(
1290+
metadata_content
1291+
)
1292+
)
12711293
crate.add_file(
12721294
metadata_filename_temp_path,
12731295
dest_path="metadata/" + metadata_filename,
12741296
properties={"name": metadata_filename},
12751297
)
12761298

12771299
bag_size_kb = bag_size / 1024
1278-
1279-
with open(bagit_path, "a") as f:
1280-
f.write("Bag-Size: " + str(bag_size_kb) + " kB" + "\n")
1281-
f.write("Payload-Oxum: " + str(bag_size) + "." + str(file_count) + "\n")
1282-
f.write("Internal-Sender-Identifier: " + dataset_id + "\n")
1283-
f.write("Internal-Sender-Description: " + dataset.description + "\n")
1284-
f.write("Contact-Name: " + user_full_name + "\n")
1285-
f.write("Contact-Email: " + user.email + "\n")
1300+
await run_in_threadpool(
1301+
lambda: open(bagit_path, "a").write(
1302+
"Bag-Size: "
1303+
+ str(bag_size_kb)
1304+
+ " kB"
1305+
+ "\n"
1306+
+ "Payload-Oxum: "
1307+
+ str(bag_size)
1308+
+ "."
1309+
+ str(file_count)
1310+
+ "\n"
1311+
+ "Internal-Sender-Identifier: "
1312+
+ dataset_id
1313+
+ "\n"
1314+
+ "Internal-Sender-Description: "
1315+
+ dataset.description
1316+
+ "\n"
1317+
+ "Contact-Name: "
1318+
+ user_full_name
1319+
+ "\n"
1320+
+ "Contact-Email: "
1321+
+ user.email
1322+
+ "\n"
1323+
)
1324+
)
12861325
crate.add_file(
12871326
bagit_path, dest_path="bagit.txt", properties={"name": "bagit.txt"}
12881327
)
@@ -1296,14 +1335,33 @@ async def download_dataset(
12961335
)
12971336

12981337
# Generate tag manifest file
1299-
manifest_md5_hash = hashlib.md5(open(manifest_path, "rb").read()).hexdigest()
1300-
bagit_md5_hash = hashlib.md5(open(bagit_path, "rb").read()).hexdigest()
1301-
bag_info_md5_hash = hashlib.md5(open(bag_info_path, "rb").read()).hexdigest()
1302-
1303-
with open(tagmanifest_path, "w") as f:
1304-
f.write(bagit_md5_hash + " " + "bagit.txt" + "\n")
1305-
f.write(manifest_md5_hash + " " + "manifest-md5.txt" + "\n")
1306-
f.write(bag_info_md5_hash + " " + "bag-info.txt" + "\n")
1338+
manifest_md5_hash = await run_in_threadpool(
1339+
lambda: hashlib.md5(open(manifest_path, "rb").read()).hexdigest()
1340+
)
1341+
bagit_md5_hash = await run_in_threadpool(
1342+
lambda: hashlib.md5(open(bagit_path, "rb").read()).hexdigest()
1343+
)
1344+
bag_info_md5_hash = await run_in_threadpool(
1345+
lambda: hashlib.md5(open(bag_info_path, "rb").read()).hexdigest()
1346+
)
1347+
1348+
await run_in_threadpool(
1349+
lambda: open(tagmanifest_path, "w").write(
1350+
bagit_md5_hash
1351+
+ " "
1352+
+ "bagit.txt"
1353+
+ "\n"
1354+
+ manifest_md5_hash
1355+
+ " "
1356+
+ "manifest-md5.txt"
1357+
+ "\n"
1358+
+ bag_info_md5_hash
1359+
+ " "
1360+
+ "bag-info.txt"
1361+
+ "\n"
1362+
)
1363+
)
1364+
13071365
crate.add_file(
13081366
tagmanifest_path,
13091367
dest_path="tagmanifest-md5.txt",
@@ -1317,13 +1375,16 @@ async def download_dataset(
13171375
)
13181376
zip_name = dataset.name + version_name + ".zip"
13191377
path_to_zip = os.path.join(current_temp_dir, zip_name)
1320-
crate.write_zip(path_to_zip)
1321-
f = open(path_to_zip, "rb", buffering=0)
1322-
zip_bytes = f.read()
1378+
1379+
await run_in_threadpool(crate.write_zip, path_to_zip) # takes the most time?
1380+
1381+
f = await run_in_threadpool(open, path_to_zip, "rb", 0)
1382+
zip_bytes = await run_in_threadpool(f.read)
13231383
stream = io.BytesIO(zip_bytes)
1324-
f.close()
1384+
await run_in_threadpool(f.close)
1385+
13251386
try:
1326-
shutil.rmtree(current_temp_dir)
1387+
await run_in_threadpool(shutil.rmtree, current_temp_dir)
13271388
except Exception as e:
13281389
print("could not delete file")
13291390
print(e)

backend/app/routers/keycloak.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
retreive_refresh_token,
1212
)
1313
from app.models.tokens import TokenDB
14-
from app.models.users import UserDB, UserIn
14+
from app.models.users import UserDB, UserLogin
15+
from app.routers.utils import save_refresh_token
1516
from fastapi import APIRouter, HTTPException, Security
1617
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
1718
from jose import ExpiredSignatureError, JWTError, jwt
@@ -80,10 +81,11 @@ async def logout(
8081

8182

8283
@router.post("/login")
83-
async def loginPost(userIn: UserIn):
84+
async def loginPost(userIn: UserLogin):
8485
"""Client can use this to login when redirect is not available."""
8586
try:
8687
token = keycloak_openid.token(userIn.email, userIn.password)
88+
await save_refresh_token(token["refresh_token"], userIn.email)
8789
return {"token": token["access_token"]}
8890
# bad credentials
8991
except KeycloakAuthenticationError as e:

0 commit comments

Comments
 (0)