Skip to content

Commit 4459d0c

Browse files
authored
Merge pull request #185 from eth-cscs/Per-cluster-datatransfer-configuration
Per cluster datatransfer configuration
2 parents 1760433 + 6e4fbe9 commit 4459d0c

File tree

22 files changed

+353
-354
lines changed

22 files changed

+353
-354
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
### Changed
1313

14+
- ***⚠️ Configuration Breaking*** `data_operation` and `data_transfer` settings are now configurable independently for each cluster.
15+
- ***⚠️ Configuration Breaking*** `datatransfer_jobs_directives` setting is now under `data_operation`.
16+
1417
### Fixed
1518

1619
- ***⚠️ API Breaking*** Fix transfer directives serialization, now properties names are properly camelcased (see issue: #162).

build/demo-launcher/src/launcher/config.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,6 @@ class UnsafeServiceAccount(ServiceAccount):
4545
secret: str
4646

4747

48-
class UnsafeHPCCluster(HPCCluster):
49-
service_account: UnsafeServiceAccount
50-
ssh: UnsafeSSHClientPool
51-
52-
5348
class UnsafeS3DataTransfer(BaseDataTransfer):
5449
service_type: str
5550
name: str
@@ -65,7 +60,12 @@ class UnsafeDataOperation(DataOperation):
6560
data_transfer: UnsafeS3DataTransfer
6661

6762

63+
class UnsafeHPCCluster(HPCCluster):
64+
service_account: UnsafeServiceAccount
65+
ssh: UnsafeSSHClientPool
66+
data_operation: UnsafeDataOperation
67+
68+
6869
class UnsafeSettings(Settings):
6970
ssh_credentials: UnsafeSSHUserKeys
7071
clusters: List[UnsafeHPCCluster] = []
71-
data_operation: UnsafeDataOperation

build/demo-launcher/src/launcher/f7t-api-config.demo-env.yaml

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,29 @@ clusters:
3030
type: "slurm"
3131
version: "24.11.0"
3232
timeout: 10
33+
connection_mode: "ssh"
3334
service_account:
3435
client_id: "firecrest-health-check"
3536
secret: "demo-secret"
36-
probing:
37-
interval_check: 120
38-
startup_grace_period: 300
3937
file_systems:
4038
- path: '/users'
4139
data_type: 'users'
4240
default_work_dir: true
43-
data_operation:
44-
max_ops_file_size: 1048576 # 1M
45-
data_transfer:
46-
service_type: "s3"
47-
name: "s3-storage"
48-
private_url: "http://192.168.240.19:9000"
49-
public_url: "http://localhost:9000"
50-
access_key_id: "storage_access_key"
51-
secret_access_key: "secret_access_key"
52-
region: "us-east-1"
53-
ttl: 604800
54-
multipart:
55-
use_split: false
56-
max_part_size: 1073741824 # 1G
57-
parallel_runs: 3
58-
tmp_folder: "tmp"
59-
probing:
60-
interval: 60
41+
probing:
42+
interval: 60
43+
data_operation:
44+
max_ops_file_size: 5242880 # 5M
45+
data_transfer:
46+
service_type: "s3"
47+
name: "s3-storage"
48+
private_url: "http://192.168.240.19:9000"
49+
public_url: "http://localhost:9000"
50+
access_key_id: "storage_access_key"
51+
secret_access_key: "secret_access_key"
52+
region: "us-east-1"
53+
ttl: 604800
54+
multipart:
55+
use_split: false
56+
max_part_size: 1073741824 # 1G
57+
parallel_runs: 3
58+
tmp_folder: "tmp"

build/helm/firecrest-api/values.yaml

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -257,49 +257,49 @@ firecrest:
257257
## default_work_dir: Mark this as the default working directory (ie, for default scheduler logs)
258258
## it's a boolean option (true or false)
259259
default_work_dir: true
260-
261-
## datatransfer_jobs_directives: Custom scheduler flags passed to data transfer jobs
262-
## (e.g. -pxfer for a dedicated partition).
263-
## example:
264-
## datatransfer_jobs_directives:
265-
## - "#SBATCH --partition=xfer"
266-
## - "#SBATCH --ntasks=1"
267-
datatransfer_jobs_directives:
268-
- "#SBATCH --ntasks=1"
269260

270-
271-
## Configuration for data transfers and small operations
272-
data_operation:
273-
## max_ops_file_size: maximum file size (in bytes) allowed for direct upload and download.
274-
max_ops_file_size: 5242880 # 5M
275-
## Data transfer service configuration
276-
data_transfer:
277-
## service_type: Type of data transfer service (available option "s3")
278-
service_type: "s3"
279-
## name: name identifier for the storage.
280-
name: "s3-storage"
281-
## private_url: Private/internal endpoint URL for the storage.
282-
private_url: ""
283-
## public_url: Public/external endpoint URL for the storage.
284-
public_url: ""
285-
## access_key_id: access key ID for S3-compatible storage
286-
access_key_id: ""
287-
## secret_access_key: Secret access key for storage. You can give directly the content or the file path using 'secret_file:/path/to/file'
261+
## Configuration for data transfers and small operations
262+
data_operation:
263+
## max_ops_file_size: maximum file size (in bytes) allowed for direct upload and download.
264+
max_ops_file_size: 5242880 # 5M
265+
## Data transfer service configuration
266+
267+
## datatransfer_jobs_directives: Custom scheduler flags passed to data transfer jobs
268+
## (e.g. -pxfer for a dedicated partition).
288269
## example:
289-
## secret_access_key: "secret_file:/app/secrets/s3_secret_access_key"
290-
secret_access_key: ""
291-
## region: Region of the storage bucket.
292-
## example
293-
## region: "eu-central-1"
294-
region: "eu-central-1"
295-
## tenant: Optional tenant identifier for multi-tenant setups.
296-
## example
297-
## tenant: "firecrest"
298-
# tenant: ""
299-
## ttl: Time-to-live (in seconds) for generated URLs.
300-
## example for one day:
301-
## ttl: 86400
302-
ttl: 86400
270+
## datatransfer_jobs_directives:
271+
## - "#SBATCH --partition=xfer"
272+
## - "#SBATCH --ntasks=1"
273+
datatransfer_jobs_directives:
274+
- "#SBATCH --ntasks=1"
275+
data_transfer:
276+
## service_type: Type of data transfer service (available option "s3")
277+
service_type: "s3"
278+
## name: name identifier for the storage.
279+
name: "s3-storage"
280+
## private_url: Private/internal endpoint URL for the storage.
281+
private_url: ""
282+
## public_url: Public/external endpoint URL for the storage.
283+
public_url: ""
284+
## access_key_id: access key ID for S3-compatible storage
285+
access_key_id: ""
286+
## secret_access_key: Secret access key for storage. You can give directly the content or the file path using 'secret_file:/path/to/file'
287+
## example:
288+
## secret_access_key: "secret_file:/app/secrets/s3_secret_access_key"
289+
secret_access_key: ""
290+
## region: Region of the storage bucket.
291+
## example
292+
## region: "eu-central-1"
293+
region: "eu-central-1"
294+
## tenant: Optional tenant identifier for multi-tenant setups.
295+
## example
296+
## tenant: "firecrest"
297+
# tenant: ""
298+
## ttl: Time-to-live (in seconds) for generated URLs.
299+
## example for one day:
300+
## ttl: 86400
301+
ttl: 86400
302+
303303

304304
## Configuration for multipart upload behavior.
305305
# multipart:

f7t-api-config.local-env-tests.yaml

Lines changed: 71 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,32 @@ clusters:
3131
timeout: 5
3232
filesystems:
3333
timeout: 5
34-
datatransfer_jobs_directives:
35-
- "#SBATCH --nodes=1"
36-
- "#SBATCH --time=0-00:15:00"
37-
- "#SBATCH --account={account}"
34+
storage:
35+
timeout: 20
3836
file_systems:
3937
- path: '/home'
4038
data_type: 'users'
4139
default_work_dir: true
40+
data_operation:
41+
max_ops_file_size: 5242880 # 5M
42+
datatransfer_jobs_directives:
43+
- "#SBATCH --nodes=1"
44+
- "#SBATCH --time=0-00:15:00"
45+
- "#SBATCH --account={account}"
46+
data_transfer:
47+
name: "s3-storage"
48+
service_type: "s3"
49+
private_url: "http://192.168.240.19:9000"
50+
public_url: "http://localhost:9000"
51+
access_key_id: "storage_access_key"
52+
secret_access_key: ""
53+
region: "us-east-1"
54+
ttl: 604800
55+
multipart:
56+
use_split: false
57+
max_part_size: 1073741824 # 1G
58+
parallel_runs: 3
59+
tmp_folder: "tmp"
4260
- name: "cluster-SLURM-ssh"
4361
ssh:
4462
host: "192.168.240.2"
@@ -58,13 +76,32 @@ clusters:
5876
timeout: 10
5977
filesystems:
6078
timeout: 10
61-
datatransfer_jobs_directives:
62-
- "#SBATCH --nodes=1"
63-
- "#SBATCH --time=0-00:15:00"
79+
storage:
80+
timeout: 20
6481
file_systems:
6582
- path: '/home'
6683
data_type: 'users'
6784
default_work_dir: true
85+
data_operation:
86+
max_ops_file_size: 5242880 # 5M
87+
datatransfer_jobs_directives:
88+
- "#SBATCH --nodes=1"
89+
- "#SBATCH --time=0-00:15:00"
90+
- "#SBATCH --account={account}"
91+
data_transfer:
92+
name: "s3-storage"
93+
service_type: "s3"
94+
private_url: "http://192.168.240.19:9000"
95+
public_url: "http://localhost:9000"
96+
access_key_id: "storage_access_key"
97+
secret_access_key: ""
98+
region: "us-east-1"
99+
ttl: 604800
100+
multipart:
101+
use_split: false
102+
max_part_size: 1073741824 # 1G
103+
parallel_runs: 3
104+
tmp_folder: "tmp"
68105
- name: "cluster-pbs"
69106
ssh:
70107
host: "192.168.240.2"
@@ -79,29 +116,34 @@ clusters:
79116
secret: ""
80117
probing:
81118
interval_check: 60
82-
datatransfer_jobs_directives:
83-
- "#PBS -l nodes=1:ppn=1"
84-
- "#PBS -l walltime=00:15:00"
119+
services:
120+
ssh:
121+
timeout: 10
122+
filesystems:
123+
timeout: 10
124+
storage:
125+
timeout: 20
85126
file_systems:
86127
- path: '/home'
87128
data_type: 'users'
88129
default_work_dir: true
89-
data_operation:
90-
max_ops_file_size: 1048576 # 1M
91-
data_transfer:
92-
name: "s3-storage"
93-
service_type: "s3"
94-
private_url: "http://192.168.240.19:9000"
95-
public_url: "http://localhost:9000"
96-
access_key_id: "storage_access_key"
97-
secret_access_key: ""
98-
region: "us-east-1"
99-
ttl: 604800
100-
multipart:
101-
use_split: false
102-
max_part_size: 1073741824 # 1G
103-
parallel_runs: 3
104-
tmp_folder: "tmp"
105-
probing:
106-
interval: 60
107-
timeout: 10
130+
data_operation:
131+
max_ops_file_size: 5242880 # 5M
132+
datatransfer_jobs_directives:
133+
- "#PBS -l nodes=1:ppn=1"
134+
- "#PBS -l walltime=00:15:00"
135+
- "#PBS -A {account}"
136+
data_transfer:
137+
name: "s3-storage"
138+
service_type: "s3"
139+
private_url: "http://192.168.240.19:9000"
140+
public_url: "http://localhost:9000"
141+
access_key_id: "storage_access_key"
142+
secret_access_key: ""
143+
region: "us-east-1"
144+
ttl: 604800
145+
multipart:
146+
use_split: false
147+
max_part_size: 1073741824 # 1G
148+
parallel_runs: 3
149+
tmp_folder: "tmp"

0 commit comments

Comments
 (0)