Skip to content

Commit 1ca4ff8

Browse files
committed
fix(aws): Implement label sanitization for AWS Batch tags in AwsBatchTaskHandler.
Added methods to sanitize individual labels and a map of labels to comply with AWS constraints. Updated tests to verify sanitization functionality. Signed-off-by: Edmund Miller <[email protected]>
1 parent ae07c96 commit 1ca4ff8

File tree

2 files changed

+229
-9
lines changed

2 files changed

+229
-9
lines changed

plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
574574

575575
if( opts.executionRole )
576576
container.setExecutionRoleArn(opts.executionRole)
577-
577+
578578
final logsGroup = opts.getLogsGroup()
579579
if( logsGroup )
580580
container.setLogConfiguration(getLogConfiguration(logsGroup, opts.getRegion()))
@@ -623,7 +623,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
623623
return result
624624
}
625625

626-
@Memoized
626+
@Memoized
627627
LogConfiguration getLogConfiguration(String name, String region) {
628628
new LogConfiguration()
629629
.withLogDriver('awslogs')
@@ -765,7 +765,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
765765
result.setJobQueue(getJobQueue(task))
766766
result.setJobDefinition(getJobDefinition(task))
767767
if( labels ) {
768-
result.setTags(labels)
768+
result.setTags(sanitizeAwsBatchLabels(labels))
769769
result.setPropagateTags(true)
770770
}
771771
// set the share identifier
@@ -849,6 +849,70 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
849849
return result
850850
}
851851

852+
/**
853+
* Sanitize resource labels to comply with AWS Batch tag requirements.
854+
* AWS Batch tags have specific constraints:
855+
* - Keys and values can contain: letters, numbers, spaces, and characters: _ . : / = + - @
856+
* - Maximum key length: 128 characters
857+
* - Maximum value length: 256 characters
858+
*
859+
* @param labels The original resource labels map
860+
* @return A new map with sanitized labels suitable for AWS Batch tags
861+
*/
862+
protected Map<String, String> sanitizeAwsBatchLabels(Map<String, String> labels) {
863+
if (!labels) return labels
864+
865+
final result = new LinkedHashMap<String, String>()
866+
867+
for (Map.Entry<String, String> entry : labels.entrySet()) {
868+
final key = entry.getKey()
869+
final value = entry.getValue()
870+
871+
if (key != null && value != null) {
872+
final sanitizedKey = sanitizeAwsBatchLabel(key.toString(), 128)
873+
final sanitizedValue = sanitizeAwsBatchLabel(value.toString(), 256)
874+
875+
// Only add non-empty keys and values
876+
if (sanitizedKey && sanitizedValue) {
877+
result.put(sanitizedKey, sanitizedValue)
878+
}
879+
}
880+
}
881+
882+
return result
883+
}
884+
885+
/**
886+
* Sanitize a single label key or value for AWS Batch tags.
887+
* Replaces invalid characters with underscores and truncates if necessary.
888+
*
889+
* @param input The input string to sanitize
890+
* @param maxLength The maximum allowed length
891+
* @return The sanitized string
892+
*/
893+
protected String sanitizeAwsBatchLabel(String input, int maxLength) {
894+
if (!input) return input
895+
896+
// Replace invalid characters with underscores
897+
// AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @
898+
String sanitized = input.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_')
899+
900+
// Replace multiple consecutive underscores/spaces with single underscore
901+
sanitized = sanitized.replaceAll(/[_\s]{2,}/, '_')
902+
903+
// Remove leading/trailing underscores and spaces
904+
sanitized = sanitized.replaceAll(/^[_\s]+|[_\s]+$/, '')
905+
906+
// Truncate if too long
907+
if (sanitized.length() > maxLength) {
908+
sanitized = sanitized.substring(0, maxLength)
909+
// Remove trailing underscore/space after truncation
910+
sanitized = sanitized.replaceAll(/[_\s]+$/, '')
911+
}
912+
913+
return sanitized ?: null
914+
}
915+
852916
/**
853917
* @return The list of environment variables to be defined in the Batch job execution context
854918
*/

plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy

Lines changed: 162 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ class AwsBatchTaskHandlerTest extends Specification {
530530
vol2: '/here:/there:ro',
531531
vol3: '/this:/that:rw',
532532
]
533-
533+
534534
when:
535535
handler.addVolumeMountsToContainer(mounts, container)
536536
then:
@@ -585,7 +585,7 @@ class AwsBatchTaskHandlerTest extends Specification {
585585
!result.containerProperties.logConfiguration
586586
!result.containerProperties.mountPoints
587587
!result.containerProperties.privileged
588-
588+
589589
when:
590590
result = handler.makeJobDefRequest(task)
591591
then:
@@ -928,7 +928,7 @@ class AwsBatchTaskHandlerTest extends Specification {
928928
then:
929929
1 * handler.isCompleted() >> false
930930
1 * handler.getMachineInfo() >> new CloudMachineInfo('x1.large', 'us-east-1b', PriceModel.spot)
931-
931+
932932
and:
933933
trace.native_id == 'xyz-123'
934934
trace.executorName == 'awsbatch'
@@ -1099,7 +1099,7 @@ class AwsBatchTaskHandlerTest extends Specification {
10991099

11001100
expect:
11011101
handler.normaliseJobId(JOB_ID) == EXPECTED
1102-
1102+
11031103
where:
11041104
JOB_ID | EXPECTED
11051105
null | null
@@ -1118,7 +1118,7 @@ class AwsBatchTaskHandlerTest extends Specification {
11181118
task.getName() >> NAME
11191119
and:
11201120
result == EXPECTED
1121-
1121+
11221122
where:
11231123
ENV | NAME | EXPECTED
11241124
[:] | 'foo' | 'foo'
@@ -1155,8 +1155,164 @@ class AwsBatchTaskHandlerTest extends Specification {
11551155
2 | true | false | 2
11561156
and:
11571157
null | true | true | 5 // <-- default to 5
1158-
0 | true | true | 5 // <-- default to 5
1158+
0 | true | true | 5 // <-- default to 5
11591159
1 | true | true | 1
11601160
2 | true | true | 2
11611161
}
1162+
1163+
@Unroll
1164+
def 'should sanitize AWS Batch label' () {
1165+
given:
1166+
def handler = Spy(AwsBatchTaskHandler)
1167+
1168+
expect:
1169+
handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED
1170+
1171+
where:
1172+
INPUT | MAX_LENGTH | EXPECTED
1173+
// Valid labels that don't need sanitization
1174+
'validLabel' | 50 | 'validLabel'
1175+
'valid-label_123' | 50 | 'valid-label_123'
1176+
'valid.label:test/path=value+more' | 50 | 'valid.label:test/path=value+more'
1177+
'label with spaces' | 50 | 'label with spaces'
1178+
'label-with@symbol' | 50 | 'label-with@symbol'
1179+
and:
1180+
// Labels with invalid characters
1181+
'label#with#hash' | 50 | 'label_with_hash'
1182+
'label$with%special&chars' | 50 | 'label_with_special_chars'
1183+
'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces__'
1184+
'label*with?wildcards' | 50 | 'label_with_wildcards'
1185+
'unicode_λαβελ_test' | 50 | 'unicode____abel_test'
1186+
and:
1187+
// Multiple consecutive invalid characters
1188+
'label###multiple###hashes' | 50 | 'label_multiple_hashes'
1189+
'label multiple spaces' | 50 | 'label_multiple_spaces'
1190+
'label___multiple___underscores' | 50 | 'label_multiple_underscores'
1191+
'label$%^&*special*&^%$chars' | 50 | 'label_special_chars'
1192+
and:
1193+
// Leading/trailing invalid characters
1194+
'###leading-hashes' | 50 | 'leading-hashes'
1195+
'trailing-hashes###' | 50 | 'trailing-hashes'
1196+
' leading-spaces' | 50 | 'leading-spaces'
1197+
'trailing-spaces ' | 50 | 'trailing-spaces'
1198+
'___leading-underscores' | 50 | 'leading-underscores'
1199+
'trailing-underscores___' | 50 | 'trailing-underscores'
1200+
and:
1201+
// Length truncation
1202+
'very-long-label-that-exceeds-max' | 10 | 'very-long-'
1203+
'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-w'
1204+
'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-w'
1205+
and:
1206+
// Edge cases
1207+
null | 50 | null
1208+
'' | 50 | ''
1209+
' ' | 50 | null
1210+
'___' | 50 | null
1211+
'###' | 50 | null
1212+
'_' | 50 | null
1213+
' ' | 50 | null
1214+
'#' | 50 | null
1215+
and:
1216+
// Complex real-world scenarios
1217+
1218+
'workflow-run-2024/01/15' | 50 | 'workflow-run-2024/01/15'
1219+
'task.hash.0x1234abcd' | 50 | 'task.hash.0x1234abcd'
1220+
'pipeline#name%with&special*chars' | 50 | 'pipeline_name_with_special_chars'
1221+
'session-id:abc123#$%' | 50 | 'session-id:abc123'
1222+
}
1223+
1224+
@Unroll
1225+
def 'should sanitize AWS Batch labels map' () {
1226+
given:
1227+
def handler = Spy(AwsBatchTaskHandler)
1228+
1229+
expect:
1230+
handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED
1231+
1232+
where:
1233+
INPUT | EXPECTED
1234+
// Null/empty input
1235+
null | null
1236+
[:] | [:]
1237+
and:
1238+
// Valid labels
1239+
[validKey: 'validValue'] | [validKey: 'validValue']
1240+
['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456']
1241+
['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces']
1242+
and:
1243+
// Invalid characters in keys and values
1244+
['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars']
1245+
['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_']
1246+
['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value']
1247+
and:
1248+
// Multiple entries with mixed validity
1249+
['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] |
1250+
['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value']
1251+
and:
1252+
// Entries that should be filtered out (null/empty after sanitization)
1253+
['validKey': 'validValue', '###': '$$$', ' ': '%%%', 'goodKey': 'goodValue'] |
1254+
['validKey': 'validValue', 'goodKey': 'goodValue']
1255+
and:
1256+
// Null keys or values
1257+
['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] |
1258+
['goodKey': 'goodValue']
1259+
and:
1260+
// Real-world example with Nextflow resource labels
1261+
[
1262+
'uniqueRunId': 'tw-12345-workflow-run',
1263+
'taskHash': 'task.hash.0x1a2b3c4d#special',
1264+
'pipelineUser': '[email protected]',
1265+
'pipelineRunName': 'my-pipeline-run(2024)',
1266+
'pipelineSessionId': 'session#id$with%special&chars',
1267+
'pipelineResume': 'false',
1268+
'pipelineName': 'my_pipeline/name:version+tag'
1269+
] |
1270+
[
1271+
'uniqueRunId': 'tw-12345-workflow-run',
1272+
'taskHash': 'task.hash.0x1a2b3c4d_special',
1273+
'pipelineUser': '[email protected]',
1274+
'pipelineRunName': 'my-pipeline-run_2024_',
1275+
'pipelineSessionId': 'session_id_with_special_chars',
1276+
'pipelineResume': 'false',
1277+
'pipelineName': 'my_pipeline/name:version+tag'
1278+
]
1279+
}
1280+
1281+
def 'should apply label sanitization in submit request' () {
1282+
given:
1283+
def task = Mock(TaskRun)
1284+
task.getName() >> 'batch-task'
1285+
task.getConfig() >> new TaskConfig(
1286+
memory: '8GB',
1287+
cpus: 4,
1288+
resourceLabels: [
1289+
'validLabel': 'validValue',
1290+
'invalid#key': 'invalid$value',
1291+
'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up'
1292+
]
1293+
)
1294+
1295+
def handler = Spy(AwsBatchTaskHandler)
1296+
1297+
when:
1298+
def req = handler.newSubmitRequest(task)
1299+
then:
1300+
1 * handler.getSubmitCommand() >> ['bash', '-c', 'test']
1301+
1 * handler.maxSpotAttempts() >> 0
1302+
1 * handler.getAwsOptions() >> new AwsOptions()
1303+
1 * handler.getJobQueue(task) >> 'test-queue'
1304+
1 * handler.getJobDefinition(task) >> 'test-job-def'
1305+
1 * handler.getEnvironmentVars() >> []
1306+
1307+
and:
1308+
def tags = req.getTags()
1309+
tags.size() == 3
1310+
tags['validLabel'] == 'validValue'
1311+
tags['invalid_key'] == 'invalid_value'
1312+
// Check that long value was truncated
1313+
tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256
1314+
tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated')
1315+
!tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_')
1316+
req.getPropagateTags() == true
1317+
}
11621318
}

0 commit comments

Comments
 (0)