Skip to content

Commit 1be6ed8

Browse files
committed
fix(aws): Implement label sanitization for AWS Batch tags in AwsBatchTaskHandler.
Added methods to sanitize individual labels and a map of labels to comply with AWS constraints. Updated tests to verify sanitization functionality. Signed-off-by: Edmund Miller <[email protected]>
1 parent 163cd37 commit 1be6ed8

File tree

2 files changed

+228
-10
lines changed

2 files changed

+228
-10
lines changed

plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
632632
return result
633633
}
634634

635-
@Memoized
635+
@Memoized
636636
LogConfiguration getLogConfiguration(String name, String region) {
637637
LogConfiguration.builder()
638638
.logDriver('awslogs')
@@ -779,7 +779,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
779779
builder.jobQueue(getJobQueue(task))
780780
builder.jobDefinition(getJobDefinition(task))
781781
if( labels ) {
782-
builder.tags(labels)
782+
builder.tags(sanitizeAwsBatchLabels(labels))
783783
builder.propagateTags(true)
784784
}
785785
// set the share identifier
@@ -864,6 +864,70 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler<String,Job
864864
return builder.build()
865865
}
866866

867+
/**
868+
* Sanitize resource labels to comply with AWS Batch tag requirements.
869+
* AWS Batch tags have specific constraints:
870+
* - Keys and values can contain: letters, numbers, spaces, and characters: _ . : / = + - @
871+
* - Maximum key length: 128 characters
872+
* - Maximum value length: 256 characters
873+
*
874+
* @param labels The original resource labels map
875+
* @return A new map with sanitized labels suitable for AWS Batch tags
876+
*/
877+
protected Map<String, String> sanitizeAwsBatchLabels(Map<String, String> labels) {
878+
if (!labels) return labels
879+
880+
final result = new LinkedHashMap<String, String>()
881+
882+
for (Map.Entry<String, String> entry : labels.entrySet()) {
883+
final key = entry.getKey()
884+
final value = entry.getValue()
885+
886+
if (key != null && value != null) {
887+
final sanitizedKey = sanitizeAwsBatchLabel(key.toString(), 128)
888+
final sanitizedValue = sanitizeAwsBatchLabel(value.toString(), 256)
889+
890+
// Only add non-empty keys and values
891+
if (sanitizedKey && sanitizedValue) {
892+
result.put(sanitizedKey, sanitizedValue)
893+
}
894+
}
895+
}
896+
897+
return result
898+
}
899+
900+
/**
901+
* Sanitize a single label key or value for AWS Batch tags.
902+
* Replaces invalid characters with underscores and truncates if necessary.
903+
*
904+
* @param input The input string to sanitize
905+
* @param maxLength The maximum allowed length
906+
* @return The sanitized string
907+
*/
908+
protected String sanitizeAwsBatchLabel(String input, int maxLength) {
909+
if (!input) return input
910+
911+
// Replace invalid characters with underscores
912+
// AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @
913+
String sanitized = input.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_')
914+
915+
// Replace multiple consecutive underscores/spaces with single underscore
916+
sanitized = sanitized.replaceAll(/[_\s]{2,}/, '_')
917+
918+
// Remove leading/trailing underscores and spaces
919+
sanitized = sanitized.replaceAll(/^[_\s]+|[_\s]+$/, '')
920+
921+
// Truncate if too long
922+
if (sanitized.length() > maxLength) {
923+
sanitized = sanitized.substring(0, maxLength)
924+
// Remove trailing underscore/space after truncation
925+
sanitized = sanitized.replaceAll(/[_\s]+$/, '')
926+
}
927+
928+
return sanitized ?: null
929+
}
930+
867931
/**
868932
* @return The list of environment variables to be defined in the Batch job execution context
869933
*/

plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy

Lines changed: 162 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -522,10 +522,9 @@ class AwsBatchTaskHandlerTest extends Specification {
522522
vol2: '/here:/there:ro',
523523
vol3: '/this:/that:rw',
524524
]
525-
and:
526-
handler.addVolumeMountsToContainer(mounts, containerModel)
527-
525+
528526
when:
527+
handler.addVolumeMountsToContainer(mounts, containerModel)
529528
def container = containerModel.toBatchContainerProperties()
530529
then:
531530
container.volumes().size() == 4
@@ -578,7 +577,6 @@ class AwsBatchTaskHandlerTest extends Specification {
578577
result.containerProperties.logConfiguration == null
579578
result.containerProperties.mountPoints == null
580579
result.containerProperties.privileged == false
581-
582580
when:
583581
result = handler.makeJobDefRequest(task)
584582
then:
@@ -907,7 +905,7 @@ class AwsBatchTaskHandlerTest extends Specification {
907905
then:
908906
1 * handler.isCompleted() >> false
909907
1 * handler.getMachineInfo() >> new CloudMachineInfo('x1.large', 'us-east-1b', PriceModel.spot)
910-
908+
911909
and:
912910
trace.native_id == 'xyz-123'
913911
trace.executorName == 'awsbatch'
@@ -1078,7 +1076,7 @@ class AwsBatchTaskHandlerTest extends Specification {
10781076

10791077
expect:
10801078
handler.normaliseJobId(JOB_ID) == EXPECTED
1081-
1079+
10821080
where:
10831081
JOB_ID | EXPECTED
10841082
null | null
@@ -1097,7 +1095,7 @@ class AwsBatchTaskHandlerTest extends Specification {
10971095
task.getName() >> NAME
10981096
and:
10991097
result == EXPECTED
1100-
1098+
11011099
where:
11021100
ENV | NAME | EXPECTED
11031101
[:] | 'foo' | 'foo'
@@ -1134,8 +1132,164 @@ class AwsBatchTaskHandlerTest extends Specification {
11341132
2 | true | false | 2
11351133
and:
11361134
null | true | true | 5 // <-- default to 5
1137-
0 | true | true | 5 // <-- default to 5
1135+
0 | true | true | 5 // <-- default to 5
11381136
1 | true | true | 1
11391137
2 | true | true | 2
11401138
}
1139+
1140+
@Unroll
1141+
def 'should sanitize AWS Batch label' () {
1142+
given:
1143+
def handler = Spy(AwsBatchTaskHandler)
1144+
1145+
expect:
1146+
handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED
1147+
1148+
where:
1149+
INPUT | MAX_LENGTH | EXPECTED
1150+
// Valid labels that don't need sanitization
1151+
'validLabel' | 50 | 'validLabel'
1152+
'valid-label_123' | 50 | 'valid-label_123'
1153+
'valid.label:test/path=value+more' | 50 | 'valid.label:test/path=value+more'
1154+
'label with spaces' | 50 | 'label with spaces'
1155+
'label-with@symbol' | 50 | 'label-with@symbol'
1156+
and:
1157+
// Labels with invalid characters
1158+
'label#with#hash' | 50 | 'label_with_hash'
1159+
'label$with%special&chars' | 50 | 'label_with_special_chars'
1160+
'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces__'
1161+
'label*with?wildcards' | 50 | 'label_with_wildcards'
1162+
'unicode_λαβελ_test' | 50 | 'unicode____abel_test'
1163+
and:
1164+
// Multiple consecutive invalid characters
1165+
'label###multiple###hashes' | 50 | 'label_multiple_hashes'
1166+
'label multiple spaces' | 50 | 'label_multiple_spaces'
1167+
'label___multiple___underscores' | 50 | 'label_multiple_underscores'
1168+
'label$%^&*special*&^%$chars' | 50 | 'label_special_chars'
1169+
and:
1170+
// Leading/trailing invalid characters
1171+
'###leading-hashes' | 50 | 'leading-hashes'
1172+
'trailing-hashes###' | 50 | 'trailing-hashes'
1173+
' leading-spaces' | 50 | 'leading-spaces'
1174+
'trailing-spaces ' | 50 | 'trailing-spaces'
1175+
'___leading-underscores' | 50 | 'leading-underscores'
1176+
'trailing-underscores___' | 50 | 'trailing-underscores'
1177+
and:
1178+
// Length truncation
1179+
'very-long-label-that-exceeds-max' | 10 | 'very-long-'
1180+
'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-w'
1181+
'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-w'
1182+
and:
1183+
// Edge cases
1184+
null | 50 | null
1185+
'' | 50 | ''
1186+
' ' | 50 | null
1187+
'___' | 50 | null
1188+
'###' | 50 | null
1189+
'_' | 50 | null
1190+
' ' | 50 | null
1191+
'#' | 50 | null
1192+
and:
1193+
// Complex real-world scenarios
1194+
1195+
'workflow-run-2024/01/15' | 50 | 'workflow-run-2024/01/15'
1196+
'task.hash.0x1234abcd' | 50 | 'task.hash.0x1234abcd'
1197+
'pipeline#name%with&special*chars' | 50 | 'pipeline_name_with_special_chars'
1198+
'session-id:abc123#$%' | 50 | 'session-id:abc123'
1199+
}
1200+
1201+
@Unroll
1202+
def 'should sanitize AWS Batch labels map' () {
1203+
given:
1204+
def handler = Spy(AwsBatchTaskHandler)
1205+
1206+
expect:
1207+
handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED
1208+
1209+
where:
1210+
INPUT | EXPECTED
1211+
// Null/empty input
1212+
null | null
1213+
[:] | [:]
1214+
and:
1215+
// Valid labels
1216+
[validKey: 'validValue'] | [validKey: 'validValue']
1217+
['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456']
1218+
['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces']
1219+
and:
1220+
// Invalid characters in keys and values
1221+
['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars']
1222+
['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_']
1223+
['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value']
1224+
and:
1225+
// Multiple entries with mixed validity
1226+
['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] |
1227+
['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value']
1228+
and:
1229+
// Entries that should be filtered out (null/empty after sanitization)
1230+
['validKey': 'validValue', '###': '$$$', ' ': '%%%', 'goodKey': 'goodValue'] |
1231+
['validKey': 'validValue', 'goodKey': 'goodValue']
1232+
and:
1233+
// Null keys or values
1234+
['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] |
1235+
['goodKey': 'goodValue']
1236+
and:
1237+
// Real-world example with Nextflow resource labels
1238+
[
1239+
'uniqueRunId': 'tw-12345-workflow-run',
1240+
'taskHash': 'task.hash.0x1a2b3c4d#special',
1241+
'pipelineUser': '[email protected]',
1242+
'pipelineRunName': 'my-pipeline-run(2024)',
1243+
'pipelineSessionId': 'session#id$with%special&chars',
1244+
'pipelineResume': 'false',
1245+
'pipelineName': 'my_pipeline/name:version+tag'
1246+
] |
1247+
[
1248+
'uniqueRunId': 'tw-12345-workflow-run',
1249+
'taskHash': 'task.hash.0x1a2b3c4d_special',
1250+
'pipelineUser': '[email protected]',
1251+
'pipelineRunName': 'my-pipeline-run_2024_',
1252+
'pipelineSessionId': 'session_id_with_special_chars',
1253+
'pipelineResume': 'false',
1254+
'pipelineName': 'my_pipeline/name:version+tag'
1255+
]
1256+
}
1257+
1258+
def 'should apply label sanitization in submit request' () {
1259+
given:
1260+
def task = Mock(TaskRun)
1261+
task.getName() >> 'batch-task'
1262+
task.getConfig() >> new TaskConfig(
1263+
memory: '8GB',
1264+
cpus: 4,
1265+
resourceLabels: [
1266+
'validLabel': 'validValue',
1267+
'invalid#key': 'invalid$value',
1268+
'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up'
1269+
]
1270+
)
1271+
1272+
def handler = Spy(AwsBatchTaskHandler)
1273+
1274+
when:
1275+
def req = handler.newSubmitRequest(task)
1276+
then:
1277+
1 * handler.getSubmitCommand() >> ['bash', '-c', 'test']
1278+
1 * handler.maxSpotAttempts() >> 0
1279+
1 * handler.getAwsOptions() >> new AwsOptions()
1280+
1 * handler.getJobQueue(task) >> 'test-queue'
1281+
1 * handler.getJobDefinition(task) >> 'test-job-def'
1282+
1 * handler.getEnvironmentVars() >> []
1283+
1284+
and:
1285+
def tags = req.getTags()
1286+
tags.size() == 3
1287+
tags['validLabel'] == 'validValue'
1288+
tags['invalid_key'] == 'invalid_value'
1289+
// Check that long value was truncated
1290+
tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256
1291+
tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated')
1292+
!tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_')
1293+
req.getPropagateTags() == true
1294+
}
11411295
}

0 commit comments

Comments
 (0)