Skip to content

Commit 9df5873

Browse files
authored
optimize(sdk): Rewrite data passing script into more optimized code (kubeflow#1029)
1 parent 71f1cc1 commit 9df5873

File tree

8 files changed

+190
-192
lines changed

8 files changed

+190
-192
lines changed

sdk/python/kfp_tekton/compiler/_data_passing_rewriter.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ def append_taskrun_params(task_name_append: str, task_path_name: str):
658658
copy_results_artifact_step = _get_base_step('copy-results-artifacts')
659659
copy_results_artifact_step['onError'] = 'continue' # supported by v0.27+ of tekton.
660660
script = "set -exo pipefail\nTOTAL_SIZE=0\n"
661+
injected_script = False
661662
for result in task_spec['results']:
662663
if task['name'] in artifact_items:
663664
artifact_i = artifact_items[task['name']]
@@ -666,30 +667,37 @@ def append_taskrun_params(task_name_append: str, task_path_name: str):
666667
src = artifact
667668
dst = '$(results.%s.path)' % sanitize_k8s_name(result['name'], allow_capital=True)
668669
if artifact_name == result['name'] and src != dst:
669-
add_copy_results_artifacts_step = True
670-
total_size_command = 'ARTIFACT_SIZE=`wc -c %s${SUFFIX} | awk \'{print $1}\'`\n' % src + \
671-
'TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)\n'
672-
copy_command = ' cp ' + src + ' ' + dst + '\n'
673-
if env.get('OUTPUT_PREVIEW', 'false').lower() == 'true':
674-
preview_size = env.get('OUTPUT_PREVIEW_SIZE', '100')
675-
total_size_command = 'TOTAL_SIZE=$( expr $TOTAL_SIZE + %s)\n' % preview_size
676-
copy_command = ' dd if=' + src + ' of=' + \
677-
dst + ' bs=' + preview_size + ' count=1\n'
678-
script += (
679-
'if [ -d ' + src + ' ]; then\n' +
680-
' tar -czvf ' + src + '.tar.gz ' + src + '\n' +
681-
' SUFFIX=".tar.gz"\n' +
682-
'fi\n' +
683-
total_size_command +
684-
'touch ' + dst + '\n' + # create an empty file by default.
685-
'if [[ $TOTAL_SIZE -lt 3072 ]]; then\n' +
686-
' if [ -d ' + src + ' ]; then\n' +
687-
' tar -tzf ' + src + '.tar.gz > ' + dst + '\n' +
688-
' elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" %s; then\n' % src +
689-
copy_command +
690-
' fi\n' +
691-
'fi\n'
692-
)
670+
if not injected_script:
671+
add_copy_results_artifacts_step = True
672+
src_arg = '"$1"'
673+
dst_arg = '"$2"'
674+
total_size_command = 'ARTIFACT_SIZE=`wc -c %s${SUFFIX} | awk \'{print $1}\'`\n' % src_arg + \
675+
'TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)\n'
676+
copy_command = ' cp ' + src_arg + ' ' + dst_arg + '\n'
677+
if env.get('OUTPUT_PREVIEW', 'false').lower() == 'true':
678+
preview_size = env.get('OUTPUT_PREVIEW_SIZE', '100')
679+
total_size_command = 'TOTAL_SIZE=$( expr $TOTAL_SIZE + %s)\n' % preview_size
680+
copy_command = ' dd if=' + src_arg + ' of=' + \
681+
dst_arg + ' bs=' + preview_size + ' count=1\n'
682+
script += (
683+
'copy_artifact() {\n'
684+
'if [ -d ' + src_arg + ' ]; then\n' +
685+
' tar -czvf ' + src_arg + '.tar.gz ' + src_arg + '\n' +
686+
' SUFFIX=".tar.gz"\n' +
687+
'fi\n' +
688+
total_size_command +
689+
'touch ' + dst_arg + '\n' + # create an empty file by default.
690+
'if [[ $TOTAL_SIZE -lt 3072 ]]; then\n' +
691+
' if [ -d ' + src_arg + ' ]; then\n' +
692+
' tar -tzf ' + src_arg + '.tar.gz > ' + dst_arg + '\n' +
693+
' elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" %s; then\n' % src_arg +
694+
copy_command +
695+
' fi\n' +
696+
'fi\n' +
697+
'}\n'
698+
)
699+
injected_script = True
700+
script += 'copy_artifact %s %s\n' % (src, dst)
693701
copy_results_artifact_step['command'].append(script)
694702
_append_original_pr_name_env_to_step(copy_results_artifact_step)
695703
if add_copy_results_artifacts_step:

sdk/python/tests/compiler/testdata/artifact_outputs.yaml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,20 +79,23 @@ spec:
7979
- |
8080
set -exo pipefail
8181
TOTAL_SIZE=0
82-
if [ -d $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data ]; then
83-
tar -czvf $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data.tar.gz $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data
82+
copy_artifact() {
83+
if [ -d "$1" ]; then
84+
tar -czvf "$1".tar.gz "$1"
8485
SUFFIX=".tar.gz"
8586
fi
86-
ARTIFACT_SIZE=`wc -c $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data${SUFFIX} | awk '{print $1}'`
87+
ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'`
8788
TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)
88-
touch $(results.data.path)
89+
touch "$2"
8990
if [[ $TOTAL_SIZE -lt 3072 ]]; then
90-
if [ -d $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data ]; then
91-
tar -tzf $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data.tar.gz > $(results.data.path)
92-
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data; then
93-
cp $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data $(results.data.path)
91+
if [ -d "$1" ]; then
92+
tar -tzf "$1".tar.gz > "$2"
93+
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then
94+
cp "$1" "$2"
9495
fi
9596
fi
97+
}
98+
copy_artifact $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data $(results.data.path)
9699
onError: continue
97100
env:
98101
- name: ORIG_PR_NAME

sdk/python/tests/compiler/testdata/artifact_passing_using_volume.yaml

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,23 @@ spec:
8282
- |
8383
set -exo pipefail
8484
TOTAL_SIZE=0
85-
if [ -d $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
86-
tar -czvf $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2
85+
copy_artifact() {
86+
if [ -d "$1" ]; then
87+
tar -czvf "$1".tar.gz "$1"
8788
SUFFIX=".tar.gz"
8889
fi
89-
ARTIFACT_SIZE=`wc -c $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2${SUFFIX} | awk '{print $1}'`
90+
ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'`
9091
TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)
91-
touch $(results.Output-2.path)
92+
touch "$2"
9293
if [[ $TOTAL_SIZE -lt 3072 ]]; then
93-
if [ -d $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
94-
tar -tzf $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz > $(results.Output-2.path)
95-
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2; then
96-
cp $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
94+
if [ -d "$1" ]; then
95+
tar -tzf "$1".tar.gz > "$2"
96+
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then
97+
cp "$1" "$2"
9798
fi
9899
fi
100+
}
101+
copy_artifact $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
99102
onError: continue
100103
env:
101104
- name: ORIG_PR_NAME
@@ -166,20 +169,23 @@ spec:
166169
- |
167170
set -exo pipefail
168171
TOTAL_SIZE=0
169-
if [ -d $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
170-
tar -czvf $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2
172+
copy_artifact() {
173+
if [ -d "$1" ]; then
174+
tar -czvf "$1".tar.gz "$1"
171175
SUFFIX=".tar.gz"
172176
fi
173-
ARTIFACT_SIZE=`wc -c $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2${SUFFIX} | awk '{print $1}'`
177+
ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'`
174178
TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)
175-
touch $(results.Output-2.path)
179+
touch "$2"
176180
if [[ $TOTAL_SIZE -lt 3072 ]]; then
177-
if [ -d $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
178-
tar -tzf $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz > $(results.Output-2.path)
179-
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2; then
180-
cp $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
181+
if [ -d "$1" ]; then
182+
tar -tzf "$1".tar.gz > "$2"
183+
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then
184+
cp "$1" "$2"
181185
fi
182186
fi
187+
}
188+
copy_artifact $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
183189
onError: continue
184190
env:
185191
- name: ORIG_PR_NAME

0 commit comments

Comments
 (0)