From 1be6ed8d0b47369b987104735629e5a6604cbd6d Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sun, 22 Jun 2025 22:18:19 -0500 Subject: [PATCH 1/6] fix(aws): Implement label sanitization for AWS Batch tags in AwsBatchTaskHandler. Added methods to sanitize individual labels and a map of labels to comply with AWS constraints. Updated tests to verify sanitization functionality. Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandler.groovy | 68 ++++++- .../aws/batch/AwsBatchTaskHandlerTest.groovy | 170 +++++++++++++++++- 2 files changed, 228 insertions(+), 10 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index 636e28c0ef..39111ca0c4 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -632,7 +632,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler sanitizeAwsBatchLabels(Map labels) { + if (!labels) return labels + + final result = new LinkedHashMap() + + for (Map.Entry entry : labels.entrySet()) { + final key = entry.getKey() + final value = entry.getValue() + + if (key != null && value != null) { + final sanitizedKey = sanitizeAwsBatchLabel(key.toString(), 128) + final sanitizedValue = sanitizeAwsBatchLabel(value.toString(), 256) + + // Only add non-empty keys and values + if (sanitizedKey && sanitizedValue) { + result.put(sanitizedKey, sanitizedValue) + } + } + } + + return result + } + + /** + * Sanitize a single label key or value for AWS Batch tags. + * Replaces invalid characters with underscores and truncates if necessary. + * + * @param input The input string to sanitize + * @param maxLength The maximum allowed length + * @return The sanitized string + */ + protected String sanitizeAwsBatchLabel(String input, int maxLength) { + if (!input) return input + + // Replace invalid characters with underscores + // AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @ + String sanitized = input.replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_') + + // Replace multiple consecutive underscores/spaces with single underscore + sanitized = sanitized.replaceAll(/[_\s]{2,}/, '_') + + // Remove leading/trailing underscores and spaces + sanitized = sanitized.replaceAll(/^[_\s]+|[_\s]+$/, '') + + // Truncate if too long + if (sanitized.length() > maxLength) { + sanitized = sanitized.substring(0, maxLength) + // Remove trailing underscore/space after truncation + sanitized = sanitized.replaceAll(/[_\s]+$/, '') + } + + return sanitized ?: null + } + /** * @return The list of environment variables to be defined in the Batch job execution context */ diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index 3e950aa109..971d420ea8 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -522,10 +522,9 @@ class AwsBatchTaskHandlerTest extends Specification { vol2: '/here:/there:ro', vol3: '/this:/that:rw', ] - and: - handler.addVolumeMountsToContainer(mounts, containerModel) - + when: + handler.addVolumeMountsToContainer(mounts, containerModel) def container = containerModel.toBatchContainerProperties() then: container.volumes().size() == 4 @@ -578,7 +577,6 @@ class AwsBatchTaskHandlerTest extends Specification { result.containerProperties.logConfiguration == null result.containerProperties.mountPoints == null result.containerProperties.privileged == false - when: result = handler.makeJobDefRequest(task) then: @@ -907,7 +905,7 @@ class AwsBatchTaskHandlerTest extends Specification { then: 1 * handler.isCompleted() >> false 1 * handler.getMachineInfo() >> new CloudMachineInfo('x1.large', 'us-east-1b', PriceModel.spot) - + and: trace.native_id == 'xyz-123' trace.executorName == 'awsbatch' @@ -1078,7 +1076,7 @@ class AwsBatchTaskHandlerTest extends Specification { expect: handler.normaliseJobId(JOB_ID) == EXPECTED - + where: JOB_ID | EXPECTED null | null @@ -1097,7 +1095,7 @@ class AwsBatchTaskHandlerTest extends Specification { task.getName() >> NAME and: result == EXPECTED - + where: ENV | NAME | EXPECTED [:] | 'foo' | 'foo' @@ -1134,8 +1132,164 @@ class AwsBatchTaskHandlerTest extends Specification { 2 | true | false | 2 and: null | true | true | 5 // <-- default to 5 - 0 | true | true | 5 // <-- default to 5 + 0 | true | true | 5 // <-- default to 5 1 | true | true | 1 2 | true | true | 2 } + + @Unroll + def 'should sanitize AWS Batch label' () { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED + + where: + INPUT | MAX_LENGTH | EXPECTED + // Valid labels that don't need sanitization + 'validLabel' | 50 | 'validLabel' + 'valid-label_123' | 50 | 'valid-label_123' + 'valid.label:test/path=value+more' | 50 | 'valid.label:test/path=value+more' + 'label with spaces' | 50 | 'label with spaces' + 'label-with@symbol' | 50 | 'label-with@symbol' + and: + // Labels with invalid characters + 'label#with#hash' | 50 | 'label_with_hash' + 'label$with%special&chars' | 50 | 'label_with_special_chars' + 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces__' + 'label*with?wildcards' | 50 | 'label_with_wildcards' + 'unicode_λαβελ_test' | 50 | 'unicode____abel_test' + and: + // Multiple consecutive invalid characters + 'label###multiple###hashes' | 50 | 'label_multiple_hashes' + 'label multiple spaces' | 50 | 'label_multiple_spaces' + 'label___multiple___underscores' | 50 | 'label_multiple_underscores' + 'label$%^&*special*&^%$chars' | 50 | 'label_special_chars' + and: + // Leading/trailing invalid characters + '###leading-hashes' | 50 | 'leading-hashes' + 'trailing-hashes###' | 50 | 'trailing-hashes' + ' leading-spaces' | 50 | 'leading-spaces' + 'trailing-spaces ' | 50 | 'trailing-spaces' + '___leading-underscores' | 50 | 'leading-underscores' + 'trailing-underscores___' | 50 | 'trailing-underscores' + and: + // Length truncation + 'very-long-label-that-exceeds-max' | 10 | 'very-long-' + 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-w' + 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-w' + and: + // Edge cases + null | 50 | null + '' | 50 | '' + ' ' | 50 | null + '___' | 50 | null + '###' | 50 | null + '_' | 50 | null + ' ' | 50 | null + '#' | 50 | null + and: + // Complex real-world scenarios + 'user@domain.com' | 50 | 'user@domain.com' + 'workflow-run-2024/01/15' | 50 | 'workflow-run-2024/01/15' + 'task.hash.0x1234abcd' | 50 | 'task.hash.0x1234abcd' + 'pipeline#name%with&special*chars' | 50 | 'pipeline_name_with_special_chars' + 'session-id:abc123#$%' | 50 | 'session-id:abc123' + } + + @Unroll + def 'should sanitize AWS Batch labels map' () { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED + + where: + INPUT | EXPECTED + // Null/empty input + null | null + [:] | [:] + and: + // Valid labels + [validKey: 'validValue'] | [validKey: 'validValue'] + ['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456'] + ['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces'] + and: + // Invalid characters in keys and values + ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars'] + ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_'] + ['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value'] + and: + // Multiple entries with mixed validity + ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] | + ['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value'] + and: + // Entries that should be filtered out (null/empty after sanitization) + ['validKey': 'validValue', '###': '$$$', ' ': '%%%', 'goodKey': 'goodValue'] | + ['validKey': 'validValue', 'goodKey': 'goodValue'] + and: + // Null keys or values + ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] | + ['goodKey': 'goodValue'] + and: + // Real-world example with Nextflow resource labels + [ + 'uniqueRunId': 'tw-12345-workflow-run', + 'taskHash': 'task.hash.0x1a2b3c4d#special', + 'pipelineUser': 'user@domain.com', + 'pipelineRunName': 'my-pipeline-run(2024)', + 'pipelineSessionId': 'session#id$with%special&chars', + 'pipelineResume': 'false', + 'pipelineName': 'my_pipeline/name:version+tag' + ] | + [ + 'uniqueRunId': 'tw-12345-workflow-run', + 'taskHash': 'task.hash.0x1a2b3c4d_special', + 'pipelineUser': 'user@domain.com', + 'pipelineRunName': 'my-pipeline-run_2024_', + 'pipelineSessionId': 'session_id_with_special_chars', + 'pipelineResume': 'false', + 'pipelineName': 'my_pipeline/name:version+tag' + ] + } + + def 'should apply label sanitization in submit request' () { + given: + def task = Mock(TaskRun) + task.getName() >> 'batch-task' + task.getConfig() >> new TaskConfig( + memory: '8GB', + cpus: 4, + resourceLabels: [ + 'validLabel': 'validValue', + 'invalid#key': 'invalid$value', + 'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up' + ] + ) + + def handler = Spy(AwsBatchTaskHandler) + + when: + def req = handler.newSubmitRequest(task) + then: + 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] + 1 * handler.maxSpotAttempts() >> 0 + 1 * handler.getAwsOptions() >> new AwsOptions() + 1 * handler.getJobQueue(task) >> 'test-queue' + 1 * handler.getJobDefinition(task) >> 'test-job-def' + 1 * handler.getEnvironmentVars() >> [] + + and: + def tags = req.getTags() + tags.size() == 3 + tags['validLabel'] == 'validValue' + tags['invalid_key'] == 'invalid_value' + // Check that long value was truncated + tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256 + tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated') + !tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_') + req.getPropagateTags() == true + } } From 765580d0e6b03032be135b5f1b23b9ccc3b389b0 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 23 Jun 2025 10:00:02 -0500 Subject: [PATCH 2/6] test(aws): Clean up failing label sanitization tests Refined expected outputs for labels with brackets, unicode characters, and special characters to ensure compliance with AWS constraints. Adjusted test cases for length truncation and null handling to reflect accurate sanitization behavior. Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandlerTest.groovy | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index 971d420ea8..93c1aaac9d 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -1157,9 +1157,9 @@ class AwsBatchTaskHandlerTest extends Specification { // Labels with invalid characters 'label#with#hash' | 50 | 'label_with_hash' 'label$with%special&chars' | 50 | 'label_with_special_chars' - 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces__' + 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces' 'label*with?wildcards' | 50 | 'label_with_wildcards' - 'unicode_λαβελ_test' | 50 | 'unicode____abel_test' + 'unicode_λαβελ_test' | 50 | 'unicode_test' and: // Multiple consecutive invalid characters 'label###multiple###hashes' | 50 | 'label_multiple_hashes' @@ -1177,8 +1177,8 @@ class AwsBatchTaskHandlerTest extends Specification { and: // Length truncation 'very-long-label-that-exceeds-max' | 10 | 'very-long-' - 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-w' - 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-w' + 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-wi' + 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-with-' and: // Edge cases null | 50 | null @@ -1219,8 +1219,8 @@ class AwsBatchTaskHandlerTest extends Specification { and: // Invalid characters in keys and values ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars'] - ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets_': 'value_squares__braces_'] - ['unicode_λkey': 'unicode_λvalue'] | ['unicode__key': 'unicode__value'] + ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets': 'value_squares_braces'] + ['unicode_λkey': 'unicode_λvalue'] | ['unicode_key': 'unicode_value'] and: // Multiple entries with mixed validity ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] | @@ -1232,7 +1232,7 @@ class AwsBatchTaskHandlerTest extends Specification { and: // Null keys or values ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] | - ['goodKey': 'goodValue'] + [null: 'validValue', 'goodKey': 'goodValue'] and: // Real-world example with Nextflow resource labels [ @@ -1248,7 +1248,7 @@ class AwsBatchTaskHandlerTest extends Specification { 'uniqueRunId': 'tw-12345-workflow-run', 'taskHash': 'task.hash.0x1a2b3c4d_special', 'pipelineUser': 'user@domain.com', - 'pipelineRunName': 'my-pipeline-run_2024_', + 'pipelineRunName': 'my-pipeline-run_2024', 'pipelineSessionId': 'session_id_with_special_chars', 'pipelineResume': 'false', 'pipelineName': 'my_pipeline/name:version+tag' From 437ee33667817f9d7bca92932180f8fd2ac7f179 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 14 Aug 2025 16:48:22 -0500 Subject: [PATCH 3/6] fix(aws): Improve AWS Batch label sanitization with comprehensive logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR feedback by enhancing the label sanitization logic with better user experience and code maintainability: - Add warning logs when labels are dropped due to null values - Add warning logs when labels are dropped after failed sanitization - Add warning logs when labels are modified during sanitization - Simplify sanitizeAwsBatchLabel method using method chaining and ternary operator - Follow codebase patterns for length truncation (similar to AbstractGridExecutor) - Improve code readability while maintaining all existing functionality Resolves silent label dropping issue and provides clear feedback to users when resource labels are modified or removed during AWS Batch job submission. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandler.groovy | 55 +++++++++++-------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index 39111ca0c4..a114f19751 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -883,15 +883,29 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler '${sanitizedKey ?: ''}', value: '${originalValue}' -> '${sanitizedValue ?: ''}'" + continue + } + + // Log if values were modified during sanitization + if (sanitizedKey != originalKey || sanitizedValue != originalValue) { + log.warn "AWS Batch label sanitized - key: '${originalKey}' -> '${sanitizedKey}', value: '${originalValue}' -> '${sanitizedValue}'" + } + + result.put(sanitizedKey, sanitizedValue) } return result @@ -908,24 +922,19 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler maxLength) { - sanitized = sanitized.substring(0, maxLength) - // Remove trailing underscore/space after truncation - sanitized = sanitized.replaceAll(/[_\s]+$/, '') - } + // Truncate if necessary and clean up any trailing underscores/spaces + final result = sanitized.size() > maxLength + ? sanitized.substring(0, maxLength).replaceAll(/[_\s]+$/, '') + : sanitized - return sanitized ?: null + return result ?: null } /** From 9f5c2ea521fc25f8ee3bf0f2d30d994c5d4938e0 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Thu, 14 Aug 2025 18:06:13 -0500 Subject: [PATCH 4/6] test(aws): Add comprehensive null value handling tests for label sanitization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add thorough test coverage for null value scenarios specifically addressing the PR comment: "when the item is "item": null is the aws tag silently dropped?" New test coverage includes: - Null values in various combinations (single, multiple, all null) - Null keys handling (actual null keys, not string "null") - Mixed null keys and values scenarios - Verification that logging occurs (not silent dropping) - Edge cases with empty strings that become null after sanitization Tests verify both functional behavior (correct dropping) and UX improvement (explicit logging instead of silent dropping). 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude Co-authored-by: adamrtalbot Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandler.groovy | 2 +- .../aws/batch/AwsBatchTaskHandlerTest.groovy | 108 +++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index a114f19751..6270cc7f41 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -930,7 +930,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler maxLength + final result = sanitized.size() > maxLength ? sanitized.substring(0, maxLength).replaceAll(/[_\s]+$/, '') : sanitized diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index 93c1aaac9d..c837010936 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -58,6 +58,7 @@ import software.amazon.awssdk.services.batch.model.ResourceType import software.amazon.awssdk.services.batch.model.RetryStrategy import software.amazon.awssdk.services.batch.model.SubmitJobRequest import software.amazon.awssdk.services.batch.model.SubmitJobResponse +import spock.lang.See import spock.lang.Specification import spock.lang.Unroll /** @@ -1282,7 +1283,7 @@ class AwsBatchTaskHandlerTest extends Specification { 1 * handler.getEnvironmentVars() >> [] and: - def tags = req.getTags() + def tags = req.tags() tags.size() == 3 tags['validLabel'] == 'validValue' tags['invalid_key'] == 'invalid_value' @@ -1290,6 +1291,109 @@ class AwsBatchTaskHandlerTest extends Specification { tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256 tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated') !tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_') - req.getPropagateTags() == true + req.propagateTags() == true + } + + @Unroll + @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") + def 'should handle null values in labels with explicit logging'() { + given: + def handler = Spy(AwsBatchTaskHandler) + + expect: + handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED + + where: + INPUT | EXPECTED + // Basic null value case - this addresses the PR comment: "when the item is "item": null is the aws tag silently dropped?" + ['item': null, 'validKey': 'validValue'] | ['validKey': 'validValue'] + + // Multiple null values + ['key1': null, 'key2': 'value2', 'key3': null] | ['key2': 'value2'] + + // All null values + ['key1': null, 'key2': null] | [:] + + // Mix of null and empty string (which becomes null after sanitization) + ['nullValue': null, 'emptyValue': '', 'validValue': 'good'] | ['validValue': 'good'] + } + + @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") + def 'should handle null keys in labels with explicit logging'() { + given: + def handler = Spy(AwsBatchTaskHandler) + + when: 'creating map with actual null key' + def labels = new HashMap() + labels.put(null, 'validValue') + labels.put('validKey', 'validValue') + def result = handler.sanitizeAwsBatchLabels(labels) + + then: 'null key is dropped' + result.size() == 1 + result['validKey'] == 'validValue' + !result.containsKey(null) + } + + @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") + def 'should handle both null keys and values'() { + given: + def handler = Spy(AwsBatchTaskHandler) + + when: 'creating map with null key and null values' + def labels = new HashMap() + labels.put(null, 'someValue') // null key + labels.put('nullValue', null) // null value + labels.put(null, null) // both null (overwrites previous null key) + labels.put('validKey', 'validValue') + def result = handler.sanitizeAwsBatchLabels(labels) + + then: 'only valid entry remains' + result.size() == 1 + result['validKey'] == 'validValue' + !result.containsKey(null) + !result.containsKey('nullValue') + } + + @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") + def 'should verify logging behavior for null handling'() { + given: + def handler = new AwsBatchTaskHandler() + def logAppender = Mock(ch.qos.logback.core.Appender) + def logger = (ch.qos.logback.classic.Logger) org.slf4j.LoggerFactory.getLogger(AwsBatchTaskHandler) + logger.addAppender(logAppender) + logger.setLevel(ch.qos.logback.classic.Level.WARN) + + when: 'sanitizing labels with null values' + def labels = ['item': null, 'validKey': 'validValue'] // This is the exact case from PR comment + handler.sanitizeAwsBatchLabels(labels) + + then: 'warning is logged for null value' + 1 * logAppender.doAppend(_) >> { args -> + def event = args[0] as ch.qos.logback.classic.spi.ILoggingEvent + assert event.level == ch.qos.logback.classic.Level.WARN + assert event.formattedMessage.contains('AWS Batch label dropped due to null value: key=item, value=null') + } + + cleanup: + logger.detachAppender(logAppender) + } + + @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") + def 'should verify no silent dropping - PR comment verification'() { + given: 'This test specifically addresses the PR comment about silent dropping' + def handler = Spy(AwsBatchTaskHandler) + + when: 'processing the exact scenario from PR comment' + def labels = ['item': null] // "when the item is "item": null is the aws tag silently dropped?" + def result = handler.sanitizeAwsBatchLabels(labels) + + then: 'result is empty (tag is dropped)' + result == [:] + + and: 'the method properly logs the dropped label (verified by observing the actual log output in test execution)' + // The actual logging is verified by the "should verify logging behavior for null handling" test above + // This test focuses on the functional behavior: null values are correctly dropped from the result + true // The key point is that silent dropping has been replaced with logged dropping } } From ef84c99b968da41e8f5cfc2c7658717572c06f0a Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Fri, 15 Aug 2025 11:17:00 -0500 Subject: [PATCH 5/6] feat(aws): Add aws.batch.sanitizeTags config option for label sanitization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add optional configuration flag `aws.batch.sanitizeTags` to control AWS Batch tag sanitization behavior. This allows users to opt-in to automatic sanitization of resource labels to ensure compliance with AWS Batch naming requirements. Key changes: - Add sanitizeTags boolean config option to AwsBatchConfig (default: false) - Add sanitizeTags() accessor method to AwsOptions - Update AwsBatchTaskHandler to conditionally apply sanitization based on config - Add comprehensive tests for enabled, disabled, and default behavior - Maintain backward compatibility - sanitization is disabled by default Users can enable sanitization with: ``` aws { batch { sanitizeTags = true } } ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandler.groovy | 3 +- .../cloud/aws/batch/AwsOptions.groovy | 4 ++ .../cloud/aws/config/AwsBatchConfig.groovy | 7 ++ .../aws/batch/AwsBatchTaskHandlerTest.groovy | 68 ++++++++++++++++++- .../aws/config/AwsBatchConfigTest.groovy | 15 ++++ 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index 6270cc7f41..c2466cf761 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -779,7 +779,8 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler volumes + @ConfigOption + @Description(""" + When `true`, enables automatic sanitization of AWS Batch job tags to ensure compliance with AWS naming requirements (default: `false`). + """) + final Boolean sanitizeTags + /** * The path for the `s5cmd` tool as an alternative to `aws s3` CLI to upload/download files */ @@ -151,6 +157,7 @@ class AwsBatchConfig implements CloudTransferOptions, ConfigScope { schedulingPriority = opts.schedulingPriority as Integer ?: 0 executionRole = opts.executionRole terminateUnschedulableJobs = opts.terminateUnschedulableJobs as boolean + sanitizeTags = opts.sanitizeTags as Boolean ?: false if( retryMode == 'built-in' ) retryMode = null // this force falling back on NF built-in retry mode instead of delegating to AWS CLI tool if( retryMode && retryMode !in AwsOptions.VALID_RETRY_MODES ) diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index c837010936..1895027809 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -1277,7 +1277,7 @@ class AwsBatchTaskHandlerTest extends Specification { then: 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] 1 * handler.maxSpotAttempts() >> 0 - 1 * handler.getAwsOptions() >> new AwsOptions() + 1 * handler.getAwsOptions() >> new AwsOptions(awsConfig: new AwsConfig(batch: [sanitizeTags: true])) 1 * handler.getJobQueue(task) >> 'test-queue' 1 * handler.getJobDefinition(task) >> 'test-job-def' 1 * handler.getEnvironmentVars() >> [] @@ -1294,6 +1294,72 @@ class AwsBatchTaskHandlerTest extends Specification { req.propagateTags() == true } + def 'should not sanitize labels when sanitizeTags config is disabled'() { + given: + def task = Mock(TaskRun) + task.getName() >> 'batch-task' + task.getConfig() >> new TaskConfig( + resourceLabels: [ + 'validLabel': 'validValue', + 'invalid#key': 'invalid$value', // These should NOT be sanitized + 'special*chars?here': 'value with spaces & symbols!' + ] + ) + + def handler = Spy(AwsBatchTaskHandler) + + when: + def req = handler.newSubmitRequest(task) + then: + 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] + 1 * handler.maxSpotAttempts() >> 0 + 1 * handler.getAwsOptions() >> new AwsOptions(awsConfig: new AwsConfig(batch: [sanitizeTags: false])) + 1 * handler.getJobQueue(task) >> 'test-queue' + 1 * handler.getJobDefinition(task) >> 'test-job-def' + 1 * handler.getEnvironmentVars() >> [] + + and: 'labels should remain unchanged (not sanitized)' + def tags = req.tags() + tags.size() == 3 + tags['validLabel'] == 'validValue' + tags['invalid#key'] == 'invalid$value' // Should still contain invalid characters + tags['special*chars?here'] == 'value with spaces & symbols!' // Should still contain invalid characters + req.propagateTags() == true + } + + def 'should not sanitize labels by default when no sanitizeTags config is specified'() { + given: + def task = Mock(TaskRun) + task.getName() >> 'batch-task' + task.getConfig() >> new TaskConfig( + resourceLabels: [ + 'validLabel': 'validValue', + 'invalid#key': 'invalid$value', // These should NOT be sanitized by default + 'test&symbol': 'value(with)brackets' + ] + ) + + def handler = Spy(AwsBatchTaskHandler) + + when: + def req = handler.newSubmitRequest(task) + then: + 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] + 1 * handler.maxSpotAttempts() >> 0 + 1 * handler.getAwsOptions() >> new AwsOptions() // No config specified, should default to false + 1 * handler.getJobQueue(task) >> 'test-queue' + 1 * handler.getJobDefinition(task) >> 'test-job-def' + 1 * handler.getEnvironmentVars() >> [] + + and: 'labels should remain unchanged (not sanitized by default)' + def tags = req.tags() + tags.size() == 3 + tags['validLabel'] == 'validValue' + tags['invalid#key'] == 'invalid$value' // Should still contain invalid characters + tags['test&symbol'] == 'value(with)brackets' // Should still contain invalid characters + req.propagateTags() == true + } + @Unroll @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") def 'should handle null values in labels with explicit logging'() { diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy index a6ed97d28f..64ab7a74f0 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy @@ -47,6 +47,7 @@ class AwsBatchConfigTest extends Specification { !batch.s5cmdPath batch.schedulingPriority == 0 !batch.terminateUnschedulableJobs + !batch.sanitizeTags } def 'should create config with options' () { @@ -153,4 +154,18 @@ class AwsBatchConfigTest extends Specification { [terminateUnschedulableJobs: false] | false [terminateUnschedulableJobs: true] | true } + + def 'should parse sanitizeTags flag' () { + given: + def opts = new AwsBatchConfig(OPTS) + + expect: + opts.sanitizeTags == SANITIZE_TAGS + + where: + OPTS | SANITIZE_TAGS + [:] | false + [sanitizeTags: false] | false + [sanitizeTags: true] | true + } } From b0042674c370e36ce93e4cb9ba67a42ebf050fa1 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Tue, 19 Aug 2025 13:40:41 -0500 Subject: [PATCH 6/6] refactor(aws): Replace custom sanitization config with strict mode validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the custom `aws.batch.sanitizeTags` configuration option with validation logic that integrates with Nextflow's existing `nextflow.enable.strict` feature flag for consistent platform behavior. Changes: - Remove `sanitizeTags` config option from AwsBatchConfig and AwsOptions - Replace `sanitizeAwsBatchLabels()` with `validateAwsBatchLabels()` - Implement strict mode detection using `nextflow.enable.strict` - Normal mode: warns about invalid AWS Batch tags but passes them through - Strict mode: throws ProcessUnrecoverableException for invalid tags - Filter out null keys/values with appropriate logging - Update all tests from sanitization to validation approach This provides better integration with Nextflow's platform-wide strict mode behavior while maintaining backward compatibility. Ref: https://github.com/nextflow-io/nextflow/pull/6211#issuecomment-3195834326 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Paolo Di Tommaso Co-Authored-By: Claude Signed-off-by: Edmund Miller --- .../aws/batch/AwsBatchTaskHandler.groovy | 111 ++++--- .../cloud/aws/batch/AwsOptions.groovy | 4 - .../cloud/aws/config/AwsBatchConfig.groovy | 7 - .../aws/batch/AwsBatchTaskHandlerTest.groovy | 305 +++++++++--------- .../aws/config/AwsBatchConfigTest.groovy | 14 - 5 files changed, 216 insertions(+), 225 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index c2466cf761..fadc9637fd 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -779,7 +779,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler sanitizeAwsBatchLabels(Map labels) { + protected Map validateAwsBatchLabels(Map labels) { if (!labels) return labels - final result = new LinkedHashMap() + final strictMode = executor.session.config.navigate('nextflow.enable.strict', false) + final violations = [] + final result = new HashMap() for (Map.Entry entry : labels.entrySet()) { final key = entry.getKey() final value = entry.getValue() - // Handle null keys or values - if (key == null || value == null) { - log.warn "AWS Batch label dropped due to null ${key == null ? 'key' : 'value'}: key=${key}, value=${value}" + // Check for null keys or values and filter them out (not validation violations) + if (key == null) { + log.warn "AWS Batch label dropped due to null key: key=null, value=${value}" continue } - - final originalKey = key.toString() - final originalValue = value.toString() - final sanitizedKey = sanitizeAwsBatchLabel(originalKey, 128) - final sanitizedValue = sanitizeAwsBatchLabel(originalValue, 256) - - // Check if sanitization resulted in empty strings - if (!sanitizedKey || !sanitizedValue) { - log.warn "AWS Batch label dropped after sanitization - key: '${originalKey}' -> '${sanitizedKey ?: ''}', value: '${originalValue}' -> '${sanitizedValue ?: ''}'" + if (value == null) { + log.warn "AWS Batch label dropped due to null value: key=${key}, value=null" continue } - // Log if values were modified during sanitization - if (sanitizedKey != originalKey || sanitizedValue != originalValue) { - log.warn "AWS Batch label sanitized - key: '${originalKey}' -> '${sanitizedKey}', value: '${originalValue}' -> '${sanitizedValue}'" + final keyStr = key.toString() + final valueStr = value.toString() + + // Validate key length + if (keyStr.length() > 128) { + violations << "Label key exceeds 128 characters: '${keyStr}' (${keyStr.length()} chars)" + } + + // Validate value length + if (valueStr.length() > 256) { + violations << "Label value exceeds 256 characters: '${keyStr}' = '${valueStr}' (${valueStr.length()} chars)" + } + + // Validate key characters + if (!isValidAwsBatchTagString(keyStr)) { + violations << "Label key contains invalid characters: '${keyStr}' - only letters, numbers, spaces, and _ . : / = + - @ are allowed" + } + + // Validate value characters + if (!isValidAwsBatchTagString(valueStr)) { + violations << "Label value contains invalid characters: '${keyStr}' = '${valueStr}' - only letters, numbers, spaces, and _ . : / = + - @ are allowed" } - result.put(sanitizedKey, sanitizedValue) + // Add valid entries to result + result[keyStr] = valueStr + } + + // Handle violations based on strict mode (but only for constraint violations, not null filtering) + if (violations) { + final message = "AWS Batch tag validation failed:\n${violations.collect{ ' - ' + it }.join('\n')}" + if (strictMode) { + throw new ProcessUnrecoverableException(message) + } else { + log.warn "${message}\nTags will be used as-is but may cause AWS Batch submission failures" + } } return result } /** - * Sanitize a single label key or value for AWS Batch tags. - * Replaces invalid characters with underscores and truncates if necessary. - * - * @param input The input string to sanitize - * @param maxLength The maximum allowed length - * @return The sanitized string + * Check if a string contains only characters allowed in AWS Batch tags. + * AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @ + * + * @param input The string to validate + * @return true if the string contains only valid characters */ - protected String sanitizeAwsBatchLabel(String input, int maxLength) { - if (!input) return input - - // Replace invalid characters and clean up the string - // AWS Batch allows: letters, numbers, spaces, and: _ . : / = + - @ - final sanitized = input - .replaceAll(/[^a-zA-Z0-9\s_.\:\/=+\-@]/, '_') // Replace invalid chars with underscores - .replaceAll(/[_\s]{2,}/, '_') // Replace multiple consecutive underscores/spaces - .replaceAll(/^[_\s]+|[_\s]+$/, '') // Remove leading/trailing underscores and spaces - - // Truncate if necessary and clean up any trailing underscores/spaces - final result = sanitized.size() > maxLength - ? sanitized.substring(0, maxLength).replaceAll(/[_\s]+$/, '') - : sanitized - - return result ?: null + protected boolean isValidAwsBatchTagString(String input, int maxLength = 128) { + if (!input) return false + if (input.length() > maxLength) return false + return input ==~ /^[a-zA-Z0-9\s_.\:\/=+\-@]*$/ } /** diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsOptions.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsOptions.groovy index a8628de518..86ea44db9d 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsOptions.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsOptions.groovy @@ -164,8 +164,4 @@ class AwsOptions implements CloudTransferOptions { return awsConfig.batchConfig.terminateUnschedulableJobs } - Boolean sanitizeTags() { - return awsConfig.batchConfig.sanitizeTags - } - } diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy index 2df35643fd..638a36f9f2 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsBatchConfig.groovy @@ -125,12 +125,6 @@ class AwsBatchConfig implements CloudTransferOptions, ConfigScope { """) final List volumes - @ConfigOption - @Description(""" - When `true`, enables automatic sanitization of AWS Batch job tags to ensure compliance with AWS naming requirements (default: `false`). - """) - final Boolean sanitizeTags - /** * The path for the `s5cmd` tool as an alternative to `aws s3` CLI to upload/download files */ @@ -157,7 +151,6 @@ class AwsBatchConfig implements CloudTransferOptions, ConfigScope { schedulingPriority = opts.schedulingPriority as Integer ?: 0 executionRole = opts.executionRole terminateUnschedulableJobs = opts.terminateUnschedulableJobs as boolean - sanitizeTags = opts.sanitizeTags as Boolean ?: false if( retryMode == 'built-in' ) retryMode = null // this force falling back on NF built-in retry mode instead of delegating to AWS CLI tool if( retryMode && retryMode !in AwsOptions.VALID_RETRY_MODES ) diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy index 1895027809..961393df37 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchTaskHandlerTest.groovy @@ -981,7 +981,14 @@ class AwsBatchTaskHandlerTest extends Specification { task.getName() >> 'batch-task' task.getConfig() >> new TaskConfig(memory: '8GB', cpus: 4, maxRetries: 2, errorStrategy: 'retry', resourceLabels:[a:'b']) - def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> false // Normal mode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + def handler = Spy(AwsBatchTaskHandler) { getExecutor() >> executor } + handler.@executor = executor when: def req = handler.newSubmitRequest(task) @@ -1139,124 +1146,75 @@ class AwsBatchTaskHandlerTest extends Specification { } @Unroll - def 'should sanitize AWS Batch label' () { + def 'should validate AWS Batch tag string constraints' () { given: def handler = Spy(AwsBatchTaskHandler) expect: - handler.sanitizeAwsBatchLabel(INPUT, MAX_LENGTH) == EXPECTED + handler.isValidAwsBatchTagString(INPUT, MAX_LENGTH) == EXPECTED where: INPUT | MAX_LENGTH | EXPECTED - // Valid labels that don't need sanitization - 'validLabel' | 50 | 'validLabel' - 'valid-label_123' | 50 | 'valid-label_123' - 'valid.label:test/path=value+more' | 50 | 'valid.label:test/path=value+more' - 'label with spaces' | 50 | 'label with spaces' - 'label-with@symbol' | 50 | 'label-with@symbol' - and: - // Labels with invalid characters - 'label#with#hash' | 50 | 'label_with_hash' - 'label$with%special&chars' | 50 | 'label_with_special_chars' - 'label(with)brackets[and]braces{}' | 50 | 'label_with_brackets_and_braces' - 'label*with?wildcards' | 50 | 'label_with_wildcards' - 'unicode_λαβελ_test' | 50 | 'unicode_test' + // Valid strings that meet AWS constraints + 'validLabel' | 128 | true + 'valid-label_123' | 128 | true + 'valid.label:test/path=value+more' | 128 | true + 'label with spaces' | 128 | true + 'label-with@symbol' | 128 | true and: - // Multiple consecutive invalid characters - 'label###multiple###hashes' | 50 | 'label_multiple_hashes' - 'label multiple spaces' | 50 | 'label_multiple_spaces' - 'label___multiple___underscores' | 50 | 'label_multiple_underscores' - 'label$%^&*special*&^%$chars' | 50 | 'label_special_chars' + // Invalid characters for AWS Batch + 'label#with#hash' | 128 | false + 'label$with%special&chars' | 128 | false + 'label(with)brackets[and]braces{}' | 128 | false + 'label*with?wildcards' | 128 | false + 'unicode_λαβελ_test' | 128 | false and: - // Leading/trailing invalid characters - '###leading-hashes' | 50 | 'leading-hashes' - 'trailing-hashes###' | 50 | 'trailing-hashes' - ' leading-spaces' | 50 | 'leading-spaces' - 'trailing-spaces ' | 50 | 'trailing-spaces' - '___leading-underscores' | 50 | 'leading-underscores' - 'trailing-underscores___' | 50 | 'trailing-underscores' - and: - // Length truncation - 'very-long-label-that-exceeds-max' | 10 | 'very-long-' - 'very-long-label-ending-with-_' | 25 | 'very-long-label-ending-wi' - 'very-long-label-ending-with-___' | 28 | 'very-long-label-ending-with-' + // Length constraints (create a string longer than 128 chars) + ('very-long-label-that-exceeds-maximum-length-for-aws-batch-tags-which-is-128-characters-for-keys-and-256-for-values-test-extra-long-suffix-to-exceed-limit') | 128 | false + 'valid-short-label' | 128 | true and: // Edge cases - null | 50 | null - '' | 50 | '' - ' ' | 50 | null - '___' | 50 | null - '###' | 50 | null - '_' | 50 | null - ' ' | 50 | null - '#' | 50 | null - and: - // Complex real-world scenarios - 'user@domain.com' | 50 | 'user@domain.com' - 'workflow-run-2024/01/15' | 50 | 'workflow-run-2024/01/15' - 'task.hash.0x1234abcd' | 50 | 'task.hash.0x1234abcd' - 'pipeline#name%with&special*chars' | 50 | 'pipeline_name_with_special_chars' - 'session-id:abc123#$%' | 50 | 'session-id:abc123' + null | 128 | false + '' | 128 | false + ' ' | 128 | true // Spaces are valid } @Unroll - def 'should sanitize AWS Batch labels map' () { + def 'should validate AWS Batch labels and return validation results' () { given: def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> IS_STRICT_MODE + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + handler.@executor = executor - expect: - handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED + when: + def result = handler.validateAwsBatchLabels(INPUT) + + then: + result == EXPECTED_RESULT where: - INPUT | EXPECTED - // Null/empty input - null | null - [:] | [:] - and: - // Valid labels - [validKey: 'validValue'] | [validKey: 'validValue'] - ['valid-key_123': 'valid-value_456'] | ['valid-key_123': 'valid-value_456'] - ['key.with:path/chars=test+more@symbol': 'value with spaces'] | ['key.with:path/chars=test+more@symbol': 'value with spaces'] - and: - // Invalid characters in keys and values - ['key#with#hash': 'value$with%special&chars'] | ['key_with_hash': 'value_with_special_chars'] - ['key(brackets)': 'value[squares]{braces}'] | ['key_brackets': 'value_squares_braces'] - ['unicode_λkey': 'unicode_λvalue'] | ['unicode_key': 'unicode_value'] - and: - // Multiple entries with mixed validity - ['validKey': 'validValue', 'invalid#key': 'invalid$value', 'another.valid:key': 'another+valid@value'] | - ['validKey': 'validValue', 'invalid_key': 'invalid_value', 'another.valid:key': 'another+valid@value'] + INPUT | IS_STRICT_MODE | EXPECTED_RESULT + // Valid labels - should pass in both modes + [validKey: 'validValue'] | false | [validKey: 'validValue'] + [validKey: 'validValue'] | true | [validKey: 'validValue'] + ['valid-key_123': 'valid-value_456'] | false | ['valid-key_123': 'valid-value_456'] + ['valid-key_123': 'valid-value_456'] | true | ['valid-key_123': 'valid-value_456'] and: - // Entries that should be filtered out (null/empty after sanitization) - ['validKey': 'validValue', '###': '$$$', ' ': '%%%', 'goodKey': 'goodValue'] | - ['validKey': 'validValue', 'goodKey': 'goodValue'] + // Mixed valid/invalid - normal mode warns and passes through, strict mode should be tested separately + ['validKey': 'validValue', 'invalid#key': 'invalid$value'] | false | ['validKey': 'validValue', 'invalid#key': 'invalid$value'] and: - // Null keys or values - ['validKey': null, null: 'validValue', 'goodKey': 'goodValue'] | - [null: 'validValue', 'goodKey': 'goodValue'] - and: - // Real-world example with Nextflow resource labels - [ - 'uniqueRunId': 'tw-12345-workflow-run', - 'taskHash': 'task.hash.0x1a2b3c4d#special', - 'pipelineUser': 'user@domain.com', - 'pipelineRunName': 'my-pipeline-run(2024)', - 'pipelineSessionId': 'session#id$with%special&chars', - 'pipelineResume': 'false', - 'pipelineName': 'my_pipeline/name:version+tag' - ] | - [ - 'uniqueRunId': 'tw-12345-workflow-run', - 'taskHash': 'task.hash.0x1a2b3c4d_special', - 'pipelineUser': 'user@domain.com', - 'pipelineRunName': 'my-pipeline-run_2024', - 'pipelineSessionId': 'session_id_with_special_chars', - 'pipelineResume': 'false', - 'pipelineName': 'my_pipeline/name:version+tag' - ] + // Null/empty handling + null | false | null + [:] | false | [:] + ['validKey': null, 'goodKey': 'goodValue'] | false | ['goodKey': 'goodValue'] } - def 'should apply label sanitization in submit request' () { + def 'should validate labels in submit request with warnings in normal mode' () { given: def task = Mock(TaskRun) task.getName() >> 'batch-task' @@ -1265,109 +1223,120 @@ class AwsBatchTaskHandlerTest extends Specification { cpus: 4, resourceLabels: [ 'validLabel': 'validValue', - 'invalid#key': 'invalid$value', - 'long-key-that-might-be-truncated-if-very-very-long': 'long-value-that-should-be-truncated-because-it-exceeds-the-maximum-allowed-length-for-aws-batch-tags-which-is-256-characters-and-this-string-is-definitely-longer-than-that-limit-so-it-will-be-cut-off-at-the-appropriate-length-and-cleaned-up' + 'invalid#key': 'invalid$value', // Invalid characters + ('keyThatIsTooLongForAwsBatchWhichHas128CharacterLimit' + 'AndWillCauseValidationToFailWithASpecificErrorMessageThatTellsTheUserWhatWentWrong'): 'value' ] ) - def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> false // Normal mode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + def handler = Spy(AwsBatchTaskHandler) { getExecutor() >> executor } + handler.@executor = executor when: def req = handler.newSubmitRequest(task) then: 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] 1 * handler.maxSpotAttempts() >> 0 - 1 * handler.getAwsOptions() >> new AwsOptions(awsConfig: new AwsConfig(batch: [sanitizeTags: true])) + 1 * handler.getAwsOptions() >> new AwsOptions() 1 * handler.getJobQueue(task) >> 'test-queue' 1 * handler.getJobDefinition(task) >> 'test-job-def' 1 * handler.getEnvironmentVars() >> [] - and: + and: 'labels should be passed through unchanged despite validation warnings' def tags = req.tags() tags.size() == 3 tags['validLabel'] == 'validValue' - tags['invalid_key'] == 'invalid_value' - // Check that long value was truncated - tags['long-key-that-might-be-truncated-if-very-very-long'].length() <= 256 - tags['long-key-that-might-be-truncated-if-very-very-long'].startsWith('long-value-that-should-be-truncated') - !tags['long-key-that-might-be-truncated-if-very-very-long'].endsWith('_') + tags['invalid#key'] == 'invalid$value' // Unchanged despite invalid characters + tags.containsKey('keyThatIsTooLongForAwsBatchWhichHas128CharacterLimit' + 'AndWillCauseValidationToFailWithASpecificErrorMessageThatTellsTheUserWhatWentWrong') req.propagateTags() == true } - def 'should not sanitize labels when sanitizeTags config is disabled'() { + def 'should throw exception in strict mode for invalid labels' () { given: - def task = Mock(TaskRun) - task.getName() >> 'batch-task' - task.getConfig() >> new TaskConfig( - resourceLabels: [ - 'validLabel': 'validValue', - 'invalid#key': 'invalid$value', // These should NOT be sanitized - 'special*chars?here': 'value with spaces & symbols!' - ] - ) + def labels = [ + 'validLabel': 'validValue', + 'invalid#key': 'invalid$value', // Invalid characters should cause exception + ('keyThatIsTooLongForAwsBatchWhichHas128CharacterLimit' + 'AndWillCauseValidationToFailWithASpecificErrorMessageThatTellsTheUserWhatWentWrong'): 'value' + ] + def config = ['nextflow': ['enable': ['strict': true]]] + def session = Mock(Session) { + getConfig() >> config + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } def handler = Spy(AwsBatchTaskHandler) + handler.@executor = executor when: - def req = handler.newSubmitRequest(task) - then: - 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] - 1 * handler.maxSpotAttempts() >> 0 - 1 * handler.getAwsOptions() >> new AwsOptions(awsConfig: new AwsConfig(batch: [sanitizeTags: false])) - 1 * handler.getJobQueue(task) >> 'test-queue' - 1 * handler.getJobDefinition(task) >> 'test-job-def' - 1 * handler.getEnvironmentVars() >> [] - - and: 'labels should remain unchanged (not sanitized)' - def tags = req.tags() - tags.size() == 3 - tags['validLabel'] == 'validValue' - tags['invalid#key'] == 'invalid$value' // Should still contain invalid characters - tags['special*chars?here'] == 'value with spaces & symbols!' // Should still contain invalid characters - req.propagateTags() == true + handler.validateAwsBatchLabels(labels) + + then: 'exception should be thrown for invalid labels in strict mode' + thrown(ProcessUnrecoverableException) } - def 'should not sanitize labels by default when no sanitizeTags config is specified'() { + def 'should pass through valid labels in both normal and strict mode' () { given: def task = Mock(TaskRun) task.getName() >> 'batch-task' task.getConfig() >> new TaskConfig( resourceLabels: [ 'validLabel': 'validValue', - 'invalid#key': 'invalid$value', // These should NOT be sanitized by default - 'test&symbol': 'value(with)brackets' + 'valid-key_123': 'valid-value_456', + 'key.with:path/chars=test+more@symbol': 'value with spaces' ] ) - def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> strictMode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + def handler = Spy(AwsBatchTaskHandler) { getExecutor() >> executor } + handler.@executor = executor when: def req = handler.newSubmitRequest(task) then: 1 * handler.getSubmitCommand() >> ['bash', '-c', 'test'] 1 * handler.maxSpotAttempts() >> 0 - 1 * handler.getAwsOptions() >> new AwsOptions() // No config specified, should default to false + 1 * handler.getAwsOptions() >> new AwsOptions() 1 * handler.getJobQueue(task) >> 'test-queue' 1 * handler.getJobDefinition(task) >> 'test-job-def' 1 * handler.getEnvironmentVars() >> [] - and: 'labels should remain unchanged (not sanitized by default)' + and: 'valid labels should pass through unchanged' def tags = req.tags() tags.size() == 3 tags['validLabel'] == 'validValue' - tags['invalid#key'] == 'invalid$value' // Should still contain invalid characters - tags['test&symbol'] == 'value(with)brackets' // Should still contain invalid characters + tags['valid-key_123'] == 'valid-value_456' + tags['key.with:path/chars=test+more@symbol'] == 'value with spaces' req.propagateTags() == true + + where: + strictMode << [false, true] // Test both normal and strict modes } @Unroll @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") - def 'should handle null values in labels with explicit logging'() { + def 'should handle null values in labels during validation'() { given: def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> false // Normal mode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + handler.@executor = executor expect: - handler.sanitizeAwsBatchLabels(INPUT) == EXPECTED + handler.validateAwsBatchLabels(INPUT) == EXPECTED where: INPUT | EXPECTED @@ -1380,20 +1349,27 @@ class AwsBatchTaskHandlerTest extends Specification { // All null values ['key1': null, 'key2': null] | [:] - // Mix of null and empty string (which becomes null after sanitization) - ['nullValue': null, 'emptyValue': '', 'validValue': 'good'] | ['validValue': 'good'] + // Mix of null and empty string + ['nullValue': null, 'emptyValue': '', 'validValue': 'good'] | ['emptyValue': '', 'validValue': 'good'] } @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") - def 'should handle null keys in labels with explicit logging'() { + def 'should handle null keys in labels during validation'() { given: def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> false // Normal mode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + handler.@executor = executor when: 'creating map with actual null key' def labels = new HashMap() labels.put(null, 'validValue') labels.put('validKey', 'validValue') - def result = handler.sanitizeAwsBatchLabels(labels) + def result = handler.validateAwsBatchLabels(labels) then: 'null key is dropped' result.size() == 1 @@ -1402,9 +1378,16 @@ class AwsBatchTaskHandlerTest extends Specification { } @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") - def 'should handle both null keys and values'() { + def 'should handle both null keys and values during validation'() { given: def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> false // Normal mode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + handler.@executor = executor when: 'creating map with null key and null values' def labels = new HashMap() @@ -1412,7 +1395,7 @@ class AwsBatchTaskHandlerTest extends Specification { labels.put('nullValue', null) // null value labels.put(null, null) // both null (overwrites previous null key) labels.put('validKey', 'validValue') - def result = handler.sanitizeAwsBatchLabels(labels) + def result = handler.validateAwsBatchLabels(labels) then: 'only valid entry remains' result.size() == 1 @@ -1422,17 +1405,24 @@ class AwsBatchTaskHandlerTest extends Specification { } @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") - def 'should verify logging behavior for null handling'() { + def 'should verify logging behavior for null handling during validation'() { given: def handler = new AwsBatchTaskHandler() + def config = ['nextflow': ['enable': ['strict': false]]] + def session = Mock(Session) { + getConfig() >> config + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + handler.@executor = executor + def logAppender = Mock(ch.qos.logback.core.Appender) def logger = (ch.qos.logback.classic.Logger) org.slf4j.LoggerFactory.getLogger(AwsBatchTaskHandler) logger.addAppender(logAppender) logger.setLevel(ch.qos.logback.classic.Level.WARN) - when: 'sanitizing labels with null values' + when: 'validating labels with null values' def labels = ['item': null, 'validKey': 'validValue'] // This is the exact case from PR comment - handler.sanitizeAwsBatchLabels(labels) + handler.validateAwsBatchLabels(labels) then: 'warning is logged for null value' 1 * logAppender.doAppend(_) >> { args -> @@ -1446,19 +1436,26 @@ class AwsBatchTaskHandlerTest extends Specification { } @See("https://github.com/nextflow-io/nextflow/pull/6211#discussion_r2161928856") - def 'should verify no silent dropping - PR comment verification'() { + def 'should verify no silent dropping during validation - PR comment verification'() { given: 'This test specifically addresses the PR comment about silent dropping' def handler = Spy(AwsBatchTaskHandler) + def session = Mock(Session) { + getConfig() >> Mock(ConfigObject) { + navigate('nextflow.enable.strict', false) >> false // Normal mode + } + } + def executor = Mock(AwsBatchExecutor) { getSession() >> session } + handler.@executor = executor when: 'processing the exact scenario from PR comment' def labels = ['item': null] // "when the item is "item": null is the aws tag silently dropped?" - def result = handler.sanitizeAwsBatchLabels(labels) + def result = handler.validateAwsBatchLabels(labels) then: 'result is empty (tag is dropped)' result == [:] and: 'the method properly logs the dropped label (verified by observing the actual log output in test execution)' - // The actual logging is verified by the "should verify logging behavior for null handling" test above + // The actual logging is verified by the "should verify logging behavior for null handling during validation" test above // This test focuses on the functional behavior: null values are correctly dropped from the result true // The key point is that silent dropping has been replaced with logged dropping } diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy index 64ab7a74f0..cfe1969c7f 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/config/AwsBatchConfigTest.groovy @@ -47,7 +47,6 @@ class AwsBatchConfigTest extends Specification { !batch.s5cmdPath batch.schedulingPriority == 0 !batch.terminateUnschedulableJobs - !batch.sanitizeTags } def 'should create config with options' () { @@ -155,17 +154,4 @@ class AwsBatchConfigTest extends Specification { [terminateUnschedulableJobs: true] | true } - def 'should parse sanitizeTags flag' () { - given: - def opts = new AwsBatchConfig(OPTS) - - expect: - opts.sanitizeTags == SANITIZE_TAGS - - where: - OPTS | SANITIZE_TAGS - [:] | false - [sanitizeTags: false] | false - [sanitizeTags: true] | true - } }