diff --git a/CHANGELOG.md b/CHANGELOG.md index b74299719..e8b2059d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ # Change Log +## 2025-06-23 - Runtime 0.18.0-alpha.12 + +- fix: ensure valid UTF8 byte sequence in HTTP response [#904](https://github.com/hypermodeinc/modus/pull/904) + ## 2025-06-23 - Runtime 0.18.0-alpha.11 - fix: adjust cluster sync settings and delays [#902](https://github.com/hypermodeinc/modus/pull/902) diff --git a/runtime/utils/http.go b/runtime/utils/http.go index fe0afb031..897f10766 100644 --- a/runtime/utils/http.go +++ b/runtime/utils/http.go @@ -114,8 +114,10 @@ func PostHttp[TResult any](ctx context.Context, url string, payload any, beforeS case []byte: result = any(content).(TResult) case string: + content = SanitizeUTF8(content) result = any(string(content)).(TResult) default: + content = SanitizeUTF8(content) if err := JsonDeserialize(content, &result); err != nil { return nil, fmt.Errorf("error deserializing response: %w", err) } diff --git a/runtime/utils/strings.go b/runtime/utils/strings.go index f84d30ac2..0f82e20fd 100644 --- a/runtime/utils/strings.go +++ b/runtime/utils/strings.go @@ -11,6 +11,7 @@ package utils import ( "unicode/utf16" + "unicode/utf8" "unsafe" ) @@ -44,3 +45,32 @@ func EncodeUTF16(str string) []byte { bytes := unsafe.Slice((*byte)(ptr), len(words)*2) return bytes } + +// SanitizeUTF8 removes invalid UTF-8 sequences from a byte slice. +// It skips over any byte that is a null byte (0) or a single-byte character +// that is not part of a valid UTF-8 sequence. +// It returns a new byte slice containing only valid UTF-8 characters. +func SanitizeUTF8(s []byte) []byte { + // This is adapted from bytes.ToValidUTF8 + b := make([]byte, 0, len(s)) + for i := 0; i < len(s); { + c := s[i] + if c == 0 { + i++ + continue + } + if c < utf8.RuneSelf { + i++ + b = append(b, c) + continue + } + _, wid := utf8.DecodeRune(s[i:]) + if wid == 1 { + i++ + continue + } + b = append(b, s[i:i+wid]...) + i += wid + } + return b +} diff --git a/runtime/utils/strings_test.go b/runtime/utils/strings_test.go index c38cb755f..414d8676f 100644 --- a/runtime/utils/strings_test.go +++ b/runtime/utils/strings_test.go @@ -43,3 +43,35 @@ func Test_DecodeUTF16(t *testing.T) { t.Errorf("expected %s, got %s", testString, str) } } + +func Test_SanitizeUTF8(t *testing.T) { + // Test with a valid UTF-8 string + validUTF8 := []byte("Hello, 世界") + sanitized := utils.SanitizeUTF8(validUTF8) + if !bytes.Equal(sanitized, validUTF8) { + t.Errorf("expected %s, got %s", validUTF8, sanitized) + } + + // Test with an invalid UTF-8 sequence + invalidUTF8 := []byte{0xff, 0xfe, 0xfd} + sanitized = utils.SanitizeUTF8(invalidUTF8) + if len(sanitized) != 0 { + t.Errorf("expected empty slice for invalid UTF-8, got %s", sanitized) + } + + // Test with a mix of valid and invalid UTF-8 + mixedUTF8 := []byte("Hello\xffWorld") + sanitized = utils.SanitizeUTF8(mixedUTF8) + expected := []byte("HelloWorld") + if !bytes.Equal(sanitized, expected) { + t.Errorf("expected %s, got %s", expected, sanitized) + } + + // Test with some null bytes + nullBytes := []byte("Hello\x00World") + sanitized = utils.SanitizeUTF8(nullBytes) + expected = []byte("HelloWorld") + if !bytes.Equal(sanitized, expected) { + t.Errorf("expected %s, got %s", expected, sanitized) + } +}