Skip to content

Commit 0baf5df

Browse files
Compute Transfer examples for transfers that require a flow
1 parent 36b5afc commit 0baf5df

16 files changed

+927
-143
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# configuration for conversion to docs.globus.org
2+
title: 'Tar and Transfer for collections with an associated flow policy'
3+
short_description: |
4+
Use Globus Compute to bundle files into a tarball, which you then transfer
5+
using Globus Transfer.
6+
7+
Two examples are included here, one in which the files are located on the
8+
server which runs Globus Compute, and one in which the files are on a user's
9+
machine and must be moved to the Compute host.
10+
11+
These two examples are modified versions of the original tar and transfer examples.
12+
They are expected to be invoked from the Globus webapp when initating a transfer
13+
where the source / destination collections have an `associated_flow_policy`
14+
with this flow.
15+
16+
example_dir: 'collection_transfer_requires_flow'
17+
append_source_blocks: false
18+
index_source:
19+
concat:
20+
files:
21+
- 'README.adoc'
22+
- 'register_function.adoc'
23+
- 'example_flow1.adoc'
24+
- 'example_flow2.adoc'
25+
include_files:
26+
- 'compute_transfer_example_1_definition.json'
27+
- 'compute_transfer_example_1_schema.json'
28+
- 'compute_transfer_example_2_definition.json'
29+
- 'compute_transfer_example_2_schema.json'
30+
- 'register_compute_function.py'
31+
32+
menu_weight: 400
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
= Tar and Transfer for collections with an associated flow policy
2+
3+
These examples demonstrate how to build **flow**s that are meant to be used as the `associated_flow_policy` for GCS collections.
4+
They are variations of the Tar and Transfer examples that rather than get the source paths from the user input, they are parsed from the incoming `globus-transfer-transfer#0.10` transfer data.
5+
6+
Each of these examples creates an archive file from the user's files and transfers that archive to a destination.
7+
In one case the source data is already on the server running Globus Connect Server and Globus Compute, and in the other it is on a source **collection** owned by the end user.
8+
9+
== Prerequisites
10+
11+
To run these examples, you must have a properly configured server and some local software installed.
12+
13+
You must have a co-located Globus Connect Server Collection and Globus Compute **endpoint**, either hosted on the same server or at least with access to a shared filesystem.
14+
15+
Globus Connect Server Collection::
16+
+
17+
You can follow
18+
link:https://docs.globus.org/globus-connect-server/v5.4/[this guide for setting up a Globus Connect Server Collection]
19+
to install Globus Connect Server and configure a **collection**.
20+
+
21+
For ease of use, we recommend using a Guest Collection.
22+
23+
Globus Compute Endpoint::
24+
+
25+
link:https://globus-compute.readthedocs.io/en/latest/endpoints/installation.html[This guide for setting up a Globus Compute Endpoint]
26+
covers installation of the Globus Compute software.
27+
+
28+
This Compute **endpoint** must have read/write permissions on the same storage location where the Globus Connect Server **ollection** is hosted.
29+
30+
Globus CLI::
31+
+
32+
You will also need the Globus CLI installed (link:https://docs.globus.org/cli/#installation[CLI installation docs]).
33+
+
34+
Globus CLI documentation recommends installation with `pipx`, as in `pipx install globus-cli`.
35+
36+
Globus Compute SDK::
37+
+
38+
You must have the `globus-compute-sdk` Python package available.
39+
We strongly recommend using a virtual environment for this installation; installing with `pip install globus-compute-sdk`.
40+
+
41+
You can follow
42+
link:https://globus-compute.readthedocs.io/en/stable/quickstart.html#installation[the Globus Compute install documentation]
43+
to install the Compute SDK client package in a virtualenv.
44+
45+
ifdef::env-github[]
46+
== Next: Learn About the `do_tar` Compute **Function**
47+
48+
link:../register_function.adoc[Register the `do_tar` Compute **Function**.]
49+
endif::[]
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
{
2+
"StartAt": "SetRunVariables",
3+
"States": {
4+
"SetRunVariables": {
5+
"Type": "ExpressionEval",
6+
"Parameters": {
7+
"gcs_base_path": "/",
8+
"compute_endpoint_id": "<INSERT YOUR COMPUTE ENDPOINT ID HERE>",
9+
"compute_function_id": "<INSERT YOUR COMPUTE FUNCTION ID HERE>",
10+
"compute_output_directory.=": "'/' + _context.run_id + '/'",
11+
"input_data_valid.=": "['~' in transfer_data.source_path or transfer_data.source_path == '/' for transfer_data in transfer_settings.DATA] == [False] * len(transfer_settings.DATA)"
12+
},
13+
"ResultPath": "$.run_vars",
14+
"Next": "ValidateTransferData"
15+
},
16+
"ValidateTransferData": {
17+
"Comment": "Validate that none of the input source paths are the path '/' or contain the character '~'.",
18+
"Type": "Choice",
19+
"Choices": [
20+
{
21+
"Variable": "$.run_vars.input_data_valid",
22+
"BooleanEquals": false,
23+
"Next": "InvalidTransferData"
24+
}
25+
],
26+
"Default": "CollectSourcePaths"
27+
},
28+
"InvalidTransferData": {
29+
"Type": "Fail",
30+
"Error": "InvalidTransferData",
31+
"Cause": "Invalid source path found in input transfer data."
32+
},
33+
"CollectSourcePaths": {
34+
"Type": "ExpressionEval",
35+
"Parameters": {
36+
"paths.=": "[item.source_path for item in transfer_settings.DATA]"
37+
},
38+
"ResultPath": "$.src",
39+
"Next": "MakeComputeWorkingDir"
40+
},
41+
"MakeComputeWorkingDir": {
42+
"Type": "Action",
43+
"ActionUrl": "https://transfer.actions.globus.org/mkdir",
44+
"Parameters": {
45+
"endpoint_id.$": "$.transfer_settings.source_endpoint",
46+
"path.$": "$.run_vars.compute_output_directory"
47+
},
48+
"ResultPath": "$.mkdir_result",
49+
"Next": "RunComputeFunction"
50+
},
51+
"RunComputeFunction": {
52+
"Type": "Action",
53+
"ActionUrl": "https://compute.actions.globus.org/v3",
54+
"Parameters": {
55+
"endpoint_id.$": "$.run_vars.compute_endpoint_id",
56+
"tasks": [
57+
{
58+
"function_id.$": "$.run_vars.compute_function_id",
59+
"args": [],
60+
"kwargs": {
61+
"src_paths.$": "$.src.paths",
62+
"dest_path.$": "$.run_vars.compute_output_directory",
63+
"gcs_base_path.$": "$.run_vars.gcs_base_path"
64+
}
65+
}
66+
]
67+
},
68+
"ResultPath": "$.compute_func_result",
69+
"Next": "GetDestinationPath"
70+
},
71+
"GetDestinationPath": {
72+
"Comment": "To get the dest path, check if the variable 'destination_path' exists and if not, default to the filename returned by the compute function.",
73+
"Type": "ExpressionEval",
74+
"Parameters": {
75+
"path.=": "getattr('destination_path', '/~/' + pathsplit(compute_func_result.details.result[0])[1])"
76+
},
77+
"ResultPath": "$.destination",
78+
"Next": "TransferFromComputeEndpoint"
79+
},
80+
"TransferFromComputeEndpoint": {
81+
"Type": "Action",
82+
"ActionUrl": "https://transfer.actions.globus.org/transfer",
83+
"Parameters": {
84+
"source_endpoint.$": "$.transfer_settings.source_endpoint",
85+
"destination_endpoint.$": "$.transfer_settings.destination_endpoint",
86+
"DATA": [
87+
{
88+
"source_path.=": "compute_func_result.details.result[0]",
89+
"destination_path.$": "$.destination.path"
90+
}
91+
]
92+
},
93+
"ResultPath": "$.transfer_result",
94+
"Next": "CleanupComputeEndpoint"
95+
},
96+
"CleanupComputeEndpoint": {
97+
"Type": "Action",
98+
"ActionUrl": "https://transfer.actions.globus.org/delete",
99+
"Parameters": {
100+
"endpoint.$": "$.transfer_settings.source_endpoint",
101+
"recursive": true,
102+
"DATA": [
103+
{
104+
"path.$": "$.run_vars.compute_output_directory"
105+
}
106+
]
107+
},
108+
"ResultPath": "$.delete_result",
109+
"End": true
110+
}
111+
}
112+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"type": "object",
3+
"required": [
4+
"transfer_settings"
5+
],
6+
"properties": {
7+
"transfer_settings": {
8+
"type": "object",
9+
"format": "globus-transfer-transfer#0.10"
10+
},
11+
"destination_path": {
12+
"type": "string",
13+
"title": "Destination Collection Path",
14+
"description": "The path on the destination collection for the tarball file"
15+
}
16+
},
17+
"additionalProperties": false
18+
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
{
2+
"StartAt": "SetRunVariables",
3+
"States": {
4+
"SetRunVariables": {
5+
"Type": "ExpressionEval",
6+
"Parameters": {
7+
"gcs_endpoint_id": "<INSERT YOUR GCS ENDPOINT ID HERE>",
8+
"gcs_base_path": "/",
9+
"compute_endpoint_id": "<INSERT YOUR COMPUTE ENDPOINT ID HERE>",
10+
"compute_function_id": "<INSERT YOUR COMPUTE FUNCTION ID HERE>",
11+
"compute_output_directory.=": "'/' + _context.run_id + '/'",
12+
"input_data_valid.=": "['~' in transfer_data.source_path or transfer_data.source_path == '/' for transfer_data in transfer_settings.DATA] == [False] * len(transfer_settings.DATA)"
13+
},
14+
"ResultPath": "$.run_vars",
15+
"Next": "ValidateTransferData"
16+
},
17+
"ValidateTransferData": {
18+
"Comment": "Validate that none of the input source paths are the path '/' or contain the character '~'.",
19+
"Type": "Choice",
20+
"Choices": [
21+
{
22+
"Variable": "$.run_vars.input_data_valid",
23+
"BooleanEquals": true,
24+
"Next": "CollectTransferData"
25+
}
26+
],
27+
"Default": "InvalidTransferData"
28+
},
29+
"InvalidTransferData": {
30+
"Type": "Fail",
31+
"Error": "InvalidTransferData",
32+
"Cause": "Invalid source path in input transfer data."
33+
},
34+
"CollectTransferData": {
35+
"Comment": "Construct a list of source paths to provide to the compute function and the transfer data to move the source paths to the intermidate collection.",
36+
"Type": "ExpressionEval",
37+
"Parameters": {
38+
"src_paths.=": "[run_vars.compute_output_directory + pathsplit(transfer_data.source_path.rstrip('/'))[1] + '/' if transfer_data.source_path.endswith('/') else run_vars.compute_output_directory + pathsplit(transfer_data.source_path)[1] for transfer_data in transfer_settings.DATA]",
39+
"src_to_intermidate_transfer_data.=": "[{'source_path': transfer_data.source_path, 'destination_path': run_vars.compute_output_directory + pathsplit(transfer_data.source_path.rstrip('/'))[1] + '/' if transfer_data.source_path.endswith('/') else run_vars.compute_output_directory + pathsplit(transfer_data.source_path)[1], 'DATA_TYPE': transfer_data.DATA_TYPE, 'recursive': transfer_data.recursive} for transfer_data in transfer_settings.DATA]"
40+
},
41+
"ResultPath": "$.transfer_data",
42+
"Next": "MakeComputeWorkingDir"
43+
},
44+
"MakeComputeWorkingDir": {
45+
"Type": "Action",
46+
"ActionUrl": "https://transfer.actions.globus.org/mkdir",
47+
"Parameters": {
48+
"endpoint_id.$": "$.run_vars.gcs_endpoint_id",
49+
"path.$": "$.run_vars.compute_output_directory"
50+
},
51+
"ResultPath": "$.mkdir_result",
52+
"Next": "TransferToComputeEndpoint"
53+
},
54+
"TransferToComputeEndpoint": {
55+
"Type": "Action",
56+
"ActionUrl": "https://transfer.actions.globus.org/transfer",
57+
"Parameters": {
58+
"source_endpoint.$": "$.transfer_settings.source_endpoint",
59+
"destination_endpoint.$": "$.run_vars.gcs_endpoint_id",
60+
"DATA.$": "$.transfer_data.src_to_intermidate_transfer_data"
61+
},
62+
"ResultPath": "$.transfer_from_src_result",
63+
"Next": "RunComputeFunction"
64+
},
65+
"RunComputeFunction": {
66+
"Type": "Action",
67+
"ActionUrl": "https://compute.actions.globus.org/v3",
68+
"Parameters": {
69+
"endpoint_id.$": "$.run_vars.compute_endpoint_id",
70+
"tasks": [
71+
{
72+
"function_id.$": "$.run_vars.compute_function_id",
73+
"args": [],
74+
"kwargs": {
75+
"src_paths.$": "$.transfer_data.src_paths",
76+
"dest_path.$": "$.run_vars.compute_output_directory",
77+
"gcs_base_path.$": "$.run_vars.gcs_base_path"
78+
}
79+
}
80+
]
81+
},
82+
"ResultPath": "$.compute_func_result",
83+
"Next": "GetDestinationPath"
84+
},
85+
"GetDestinationPath": {
86+
"Comment": "To get the dest path, check if the variable 'destination_path' exists and if not, default to the filename returned by the compute function.",
87+
"Type": "ExpressionEval",
88+
"Parameters": {
89+
"path.=": "getattr('destination_path', '/~/' + pathsplit(compute_func_result.details.result[0])[1])"
90+
},
91+
"ResultPath": "$.destination",
92+
"Next": "TransferFromComputeEndpoint"
93+
},
94+
"TransferFromComputeEndpoint": {
95+
"Type": "Action",
96+
"ActionUrl": "https://transfer.actions.globus.org/transfer",
97+
"Parameters": {
98+
"source_endpoint.$": "$.run_vars.gcs_endpoint_id",
99+
"destination_endpoint.$": "$.transfer_settings.destination_endpoint",
100+
"DATA": [
101+
{
102+
"source_path.=": "compute_func_result.details.result[0]",
103+
"destination_path.$": "$.destination.path"
104+
}
105+
]
106+
},
107+
"ResultPath": "$.transfer_to_dest_result",
108+
"Next": "CleanupComputeEndpoint"
109+
},
110+
"CleanupComputeEndpoint": {
111+
"Type": "Action",
112+
"ActionUrl": "https://transfer.actions.globus.org/delete",
113+
"Parameters": {
114+
"endpoint.$": "$.run_vars.gcs_endpoint_id",
115+
"recursive": true,
116+
"DATA": [
117+
{
118+
"path.$": "$.run_vars.compute_output_directory"
119+
}
120+
]
121+
},
122+
"ResultPath": "$.delete_compute_output_result",
123+
"End": true
124+
}
125+
}
126+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"type": "object",
3+
"required": [
4+
"transfer_settings"
5+
],
6+
"properties": {
7+
"transfer_settings": {
8+
"type": "object",
9+
"format": "globus-transfer-transfer#0.10"
10+
},
11+
"destination_path": {
12+
"type": "string",
13+
"title": "Destination Collection Path",
14+
"description": "The path on the destination collection for the tarball file"
15+
}
16+
},
17+
"additionalProperties": false
18+
}

0 commit comments

Comments
 (0)