Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ Before submitting this PR, please make sure that:

**Additional Information**

Add any other information or context that may be relevant to this PR. This can include potential impacts, known issues, or future work related to this change.
Add any other information or context that may be relevant to this PR. This can include potential impacts, known issues, or future work related to this change.
2 changes: 1 addition & 1 deletion api/.env.default
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ CILOGON_OAUTH_CLIENT_SECRET=example
MICROSOFT_OAUTH_CLIENT_ID=example
MICROSOFT_OAUTH_CLIENT_SECRET=example

DOWNLOAD_SERVER_BASE_URL=http://secure_download:3060
DOWNLOAD_SERVER_BASE_URL=http://localhost:3060

FILESYSTEM_SEARCH_SPACES=/opt/sca/data
FILESYSTEM_BASE_DIR_SCRATCH=/path/to/scratch/base/dir
Expand Down
4 changes: 2 additions & 2 deletions api/config/docker.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{

}
"mode": "docker"
}
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ services:
secure_download:
image: node:19
restart: unless-stopped
expose:
- 3060
ports:
- "3060:3060"
env_file:
- secure_download/.env.default # For all default settings
volumes:
Expand Down
27 changes: 18 additions & 9 deletions rhythm/bin/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,19 @@

set -e

api_env="api/.env"
# Always run from the app root so relative paths map to the mounted volumes
APP_ROOT="/app"
if [ -d "$APP_ROOT" ]; then
cd "$APP_ROOT"
fi

API_DIR="${APP_ROOT}/api"
if [ ! -d "$API_DIR" ]; then
echo "ERROR: Expected API directory at $API_DIR not found."
exit 1
fi

api_env="${API_DIR}/.env"

echo "Starting Rhythm API..."

Expand All @@ -11,10 +23,9 @@ if [ -f "$api_env" ]; then
echo ".env file exists in api directory."
else
echo "Creating .env file in api directory..."
touch $api_env
touch "$api_env"
fi


if [ -f "keys/auth.key" ] && [ -f "keys/auth.pub" ]; then
echo "Keys already exist. Skipping key generation."
else
Expand All @@ -23,8 +34,6 @@ else
cd ../
fi



# Check if the string exists in the file
api_token="WORKFLOW_AUTH_TOKEN"
echo "Checking if the string '${api_token}' exists in the file '$api_env'..."
Expand All @@ -34,16 +43,16 @@ if grep -q "^${api_token}=" "$api_env"; then
echo "The file contains the string '${api_token}' with a value: $value"
else
echo "The string '${api_token}' exists but has no value."
sed -i '/^WORKFLOW_AUTH_TOKEN/d' $api_env
echo "WORKFLOW_AUTH_TOKEN=$(python -m rhythm_api.scripts.issue_token --sub bioloop-dev.sca.iu.edu)" >> $api_env
sed -i '/^WORKFLOW_AUTH_TOKEN/d' "$api_env"
echo "WORKFLOW_AUTH_TOKEN=$(python -m rhythm_api.scripts.issue_token --sub bioloop-dev.sca.iu.edu)" >> "$api_env"
echo "Created new API token."
echo "INFO: You MUST RESTART THE API in order to use the new token."
fi
else
echo "The string '${api_token}' does not exist in the file."
echo "WORKFLOW_AUTH_TOKEN=$(python -m rhythm_api.scripts.issue_token --sub bioloop-dev.sca.iu.edu)" >> $api_env
echo "WORKFLOW_AUTH_TOKEN=$(python -m rhythm_api.scripts.issue_token --sub bioloop-dev.sca.iu.edu)" >> "$api_env"
echo "Created new API token."
echo "INFO: You MUST RESTART THE API in order to use the new token."
fi

$*
$*
2 changes: 2 additions & 0 deletions secure_download/.env.default
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
NODE_ENV=docker
JWKS_URI=http://signet:5050/oauth/jwks
UPLOAD_PATH_DATA_PRODUCTS=/opt/sca/data

EXPRESS_HOST=0.0.0.0
10 changes: 8 additions & 2 deletions secure_download/config/custom-environment-variables.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
{
"mode": "NODE_ENV",
"auth": {
"jwks_uri": "JWKS_URI"
}
}
},
"express": {
"port": "EXPRESS_PORT",
"host": "EXPRESS_HOST"
},
"download_path": "DOWNLOAD_PATH"
}
5 changes: 3 additions & 2 deletions secure_download/config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
"jwks_uri": "http://127.0.0.1:5050/oauth/jwks"
},
"scope_prefix": "download_file:",
"upload_scope": "upload_file"
}
"upload_scope": "upload_file",
"download_path": "/opt/sca/data/downloads"
}
3 changes: 3 additions & 0 deletions secure_download/config/docker.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"mode": "docker"
}
3 changes: 3 additions & 0 deletions secure_download/config/local.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"mode": "local"
}
3 changes: 3 additions & 0 deletions secure_download/config/test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"mode": "test"
}
52 changes: 38 additions & 14 deletions secure_download/src/routes/download.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ const express = require('express');
const createError = require('http-errors');
const config = require('config');
const { param } = require('express-validator');
const path = require('path');
const fs = require('fs');

const { validate } = require('../middleware/validators');
const asyncHandler = require('../middleware/asyncHandler');
Expand All @@ -12,15 +14,30 @@ function remove_leading_slash(str) {
return str?.replace(/^\/+/, '');
}

// The trailing * ensures that this route can accept a file's path as a string after
// the dataset's bundle name, in case a specific file is being requested for download.
/**
* Get a stream of the file at the given path
* @param {string} filePath - The path to the file to stream
* @returns {fs.ReadStream} - A stream of the file
*/
function getFileStream(filePath) {
const _filePath = path.join(config.get('download_path'), filePath);
if (!fs.existsSync(_filePath)) {
throw createError.NotFound('File not found');
}
const stream = fs.createReadStream(_filePath);
stream.on('error', (err) => {
console.error('Error streaming file:', err);
throw createError.InternalServerError('Error downloading file');
});
return stream;
}

router.get(
'/:bundle_name*',
'/:file_path',
validate([
param('bundle_name').escape().notEmpty(),
param('file_path').escape().notEmpty(),
]),
asyncHandler(async (req, res, next) => {
console.log('inside /download/:bundle_name');
const SCOPE_PREFIX = config.get('scope_prefix');

const scopes = (req.token?.scope || '').split(' ');
Expand All @@ -31,20 +48,27 @@ router.get(
}

const token_file_path = remove_leading_slash(download_scopes[0].slice(SCOPE_PREFIX.length));
const req_path = remove_leading_slash(req.path);
const req_path = remove_leading_slash(decodeURIComponent(req.path));

if (req_path === token_file_path) {
res.set('X-Accel-Redirect', `/data/${token_file_path}`);

// make browser download response instead of attempting to render it
res.set('content-type', 'application/octet-stream; charset=utf-8');

// makes nginx not cache the response file
// otherwise the response cuts off at 1GB as the max buffer size is reached
// and the file download fails
// https://stackoverflow.com/a/64282626
res.set('X-Accel-Buffering', 'no');
res.send('');
if (config.get('mode') !== 'production') {
// In docker/dev/local/test mode, directly stream the file using Express
const stream = getFileStream(token_file_path);
stream.pipe(res);
} else {
// In production mode, use nginx X-Accel-Redirect
res.set('X-Accel-Redirect', `/data/${token_file_path}`);

// makes nginx not cache the response file
// otherwise the response cuts off at 1GB as the max buffer size is reached
// and the file download fails
// https://stackoverflow.com/a/64282626
res.set('X-Accel-Buffering', 'no');
res.send('');
}
} else {
return next(createError.Forbidden('Invalid path'));
}
Expand Down
3 changes: 1 addition & 2 deletions ui/.env.default
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ VITE_GOOGLE_RETURN=https://localhost/auth/google
VITE_CILOGON_RETURN=https://localhost/auth/cil
VITE_MICROSOFT_RETURN=https://localhost/auth/microsoft

VITE_UPLOAD_API_BASE_PATH=https://localhost
VITE_UPLOAD_API_URL=http://secure_download:3060 # for if it's the same host as in dev
VITE_UPLOAD_API_BASE_PATH=http://localhost:3060 # for if it's the same host as in dev

VITE_SCRATCH_BASE_DIR=/path/to/scratch/base_dir
VITE_SCRATCH_MOUNT_DIR=/path/to/scratch/mount_dir
Expand Down
2 changes: 1 addition & 1 deletion ui/vite.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ export default defineConfig(({ command, mode }) => {
},
},
"/upload": {
target: env.VITE_UPLOAD_API_URL,
target: env.VITE_UPLOAD_API_BASE_PATH,
changeOrigin: true,
secure: false,
},
Expand Down
1 change: 1 addition & 0 deletions workers/workers/config/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
FIVE_MINUTES = 5 * 60

config = {
'mode': 'default',
'app_id': 'bioloop-dev.sca.iu.edu',
# cspell: disable-next-line
'genome_file_types': ['.cbcl', '.bcl', '.bcl.gz', '.bgzf', '.fastq.gz', '.bam', '.bam.bai', '.vcf.gz',
Expand Down
19 changes: 10 additions & 9 deletions workers/workers/tasks/archive.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import shutil
from pathlib import Path

from celery import Celery
from celery.utils.log import get_task_logger
from sca_rhythm import WorkflowTask
import json

import workers.api as api
import workers.cmd as cmd
Expand Down Expand Up @@ -61,26 +62,26 @@ def archive(celery_task: WorkflowTask, dataset: dict, delete_local_file: bool =
'md5': bundle_checksum,
}

sda_dir = wf_utils.get_archive_dir(dataset['type'])
sda_bundle_path = f'{sda_dir}/{bundle.name}'
dataset_type_archive_dir = wf_utils.get_archive_dir(dataset['type'])
dataset_bundle_path = f'{dataset_type_archive_dir}/{bundle.name}'

wf_utils.upload_file_to_sda(local_file_path=bundle,
sda_file_path=sda_bundle_path,
celery_task=celery_task)
wf_utils.archive(local_file_path=bundle,
archive_path=dataset_bundle_path,
celery_task=celery_task)

if delete_local_file:
# file successfully uploaded to SDA, delete the local copy
print("deleting local bundle")
bundle.unlink()

return sda_bundle_path, bundle_attrs
return dataset_bundle_path, bundle_attrs


def archive_dataset(celery_task, dataset_id, **kwargs):
dataset = api.get_dataset(dataset_id=dataset_id, bundle=True)
sda_bundle_path, bundle_attrs = archive(celery_task, dataset)
archived_bundle_path, bundle_attrs = archive(celery_task, dataset)
update_data = {
'archive_path': sda_bundle_path,
'archive_path': archived_bundle_path,
'bundle': bundle_attrs
}
api.update_dataset(dataset_id=dataset_id, update_data=update_data)
Expand Down
17 changes: 8 additions & 9 deletions workers/workers/tasks/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
from sca_rhythm import WorkflowTask

import workers.api as api
import workers.utils as utils
from workers.config import config
import workers.config.celeryconfig as celeryconfig
import workers.utils as utils
import workers.workflow_utils as wf_utils
from workers.dataset import compute_staging_path
from workers.dataset import get_bundle_staged_path
from workers import exceptions as exc
from workers.config import config
from workers.dataset import compute_staging_path, get_bundle_staged_path

app = Celery("tasks")
app.config_from_object(celeryconfig)
Expand Down Expand Up @@ -60,24 +59,24 @@ def extract_tarfile(tar_path: Path, target_dir: Path, override_arcname=False):

def stage(celery_task: WorkflowTask, dataset: dict) -> (str, str):
"""
gets the tar from SDA and extracts it
gets the tar from the archived location, and extracts it

input: dataset['name'], dataset['archive_path'] should exist
returns: stage_path
"""
staging_dir, alias = compute_staging_path(dataset)

sda_bundle_path = dataset['archive_path']
archived_bundle_path = dataset['archive_path']
alias_dir = staging_dir.parent
alias_dir.mkdir(parents=True, exist_ok=True)

bundle = dataset["bundle"]
bundle_md5 = bundle["md5"]
bundle_download_path = Path(get_bundle_staged_path(dataset=dataset))

wf_utils.download_file_from_sda(sda_file_path=sda_bundle_path,
local_file_path=bundle_download_path,
celery_task=celery_task)
wf_utils.stage(archive_path=archived_bundle_path,
local_file_path=bundle_download_path,
celery_task=celery_task)

evaluated_checksum = utils.checksum(bundle_download_path)
if evaluated_checksum != bundle_md5:
Expand Down
Loading