From 89b92b99c8ab93d9895be1d27aa4d40540dbbc2b Mon Sep 17 00:00:00 2001 From: Jiangzhou He Date: Wed, 16 Jul 2025 19:55:44 -0700 Subject: [PATCH] fix(s3): properly decode special chars in filename from change event --- src/ops/sources/amazon_s3.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/ops/sources/amazon_s3.rs b/src/ops/sources/amazon_s3.rs index fdd0a70b1..260546c1f 100644 --- a/src/ops/sources/amazon_s3.rs +++ b/src/ops/sources/amazon_s3.rs @@ -4,10 +4,21 @@ use aws_config::BehaviorVersion; use aws_sdk_s3::Client; use globset::{Glob, GlobSet, GlobSetBuilder}; use std::sync::Arc; +use urlencoding; use crate::base::field_attrs; use crate::ops::sdk::*; +/// Decode a form-encoded URL string, treating '+' as spaces +fn decode_form_encoded_url(input: &str) -> Result> { + // Replace '+' with spaces (form encoding convention), then decode + // This handles both cases correctly: + // - Literal '+' would be encoded as '%2B' and remain unchanged after replacement + // - Space would be encoded as '+' and become ' ' after replacement + let with_spaces = input.replace("+", " "); + Ok(urlencoding::decode(&with_spaces)?.into()) +} + #[derive(Debug, Deserialize)] pub struct Spec { bucket_name: String, @@ -258,8 +269,9 @@ impl Executor { if record.event_name.starts_with("ObjectCreated:") || record.event_name.starts_with("ObjectRemoved:") { + let decoded_key = decode_form_encoded_url(&s3.object.key)?; changes.push(SourceChange { - key: KeyValue::Str(s3.object.key.into()), + key: KeyValue::Str(decoded_key), data: None, }); }