Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions data/codings.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,42 @@ func (*iso88591) Decode(data []byte) (string, error) {

func (*iso88591) DataCoding() byte { return LATIN1Coding }

func (*iso88591) ShouldSplit(text string, octetLimit uint) bool {
return uint(len([]rune(text))) > octetLimit
}

func (c *iso88591) EncodeSplit(text string, octetLimit uint) ([][]byte, error) {
var segments [][]byte
runeSlice := []rune(text)
limit := int(octetLimit)

for i := 0; i < len(runeSlice); {
end := i + limit
if end > len(runeSlice) {
end = len(runeSlice)
}

segment, err := c.Encode(string(runeSlice[i:end]))
if err != nil {
return nil, err
}

// If encoded segment exceeds limit, reduce character count
for len(segment) > limit && end > i+1 {
end--
segment, err = c.Encode(string(runeSlice[i:end]))
if err != nil {
return nil, err
}
}

segments = append(segments, segment)
i = end
}

return segments, nil
}

type binary8bit1 struct{}

func (*binary8bit1) Encode(_ string) ([]byte, error) {
Expand Down
84 changes: 83 additions & 1 deletion data/codings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func shiftBitsOneRight(input []byte) []byte {
}

func TestCoding(t *testing.T) {
require.Nil(t, FromDataCoding(12))
require.Equal(t, NewCustomEncoding(12, GSM7BIT), FromDataCoding(12)) // GSM7BIT is default when encoding is reserved
require.Equal(t, GSM7BIT, FromDataCoding(0))
require.Equal(t, ASCII, FromDataCoding(1))
require.Equal(t, UCS2, FromDataCoding(8))
Expand All @@ -79,6 +79,23 @@ func TestGSM7Bit(t *testing.T) {
}

func TestShouldSplit(t *testing.T) {
t.Run("testShouldSplit_LATIN1", func(t *testing.T) {
octetLim := uint(140)
expect := map[string]bool{
"": false,
"1": false,
"12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890": false, // exactly 140 chars
"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901": true, // 141 chars
}

splitter, ok := LATIN1.(Splitter)
require.True(t, ok, "LATIN1 must implement Splitter interface")
for k, v := range expect {
ok := splitter.ShouldSplit(k, octetLim)
require.Equalf(t, v, ok, "Test case len=%d", len(k))
}
})

t.Run("testShouldSplit_GSM7BIT", func(t *testing.T) {
octetLim := uint(140)
expect := map[string]bool{
Expand Down Expand Up @@ -134,6 +151,71 @@ func TestShouldSplit(t *testing.T) {
func TestSplit(t *testing.T) {
require.EqualValues(t, 0o0, GSM7BITPACKED.DataCoding())

t.Run("testSplitLATIN1Empty", func(t *testing.T) {
testEncodingSplit(t, LATIN1,
134,
"",
[]string{
"",
},
[]string{
"",
})
})

t.Run("testSplitLATIN1", func(t *testing.T) {
// 213 'a' characters - should split into 2 segments: 134 + 79
msg := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
splitter, ok := LATIN1.(Splitter)
require.True(t, ok)

segments, err := splitter.EncodeSplit(msg, 134)
require.Nil(t, err)
require.Equal(t, 2, len(segments))
require.Equal(t, 134, len(segments[0]))
require.Equal(t, 79, len(segments[1]))

// Verify decoded content
decoded1, err := LATIN1.Decode(segments[0])
require.Nil(t, err)
decoded2, err := LATIN1.Decode(segments[1])
require.Nil(t, err)
require.Equal(t, msg, decoded1+decoded2)
})

t.Run("testSplitLATIN1WithSpecialChars", func(t *testing.T) {
// Test with LATIN1 special characters (é, ñ, ü, etc.)
// 150 chars total - should split into 2 segments: 134 + 16
msg := "Héllo Wörld! Thís ís á tëst mëssägé wíth spëcíäl chäräctërs. Lét's sëé höw ít splíts. Möré téxt tö réäch thé límít. Änd ëvén möré tëxt tö mäké ít längér."
splitter, ok := LATIN1.(Splitter)
require.True(t, ok)

segments, err := splitter.EncodeSplit(msg, 134)
require.Nil(t, err)
require.Equal(t, 2, len(segments))
require.LessOrEqual(t, len(segments[0]), 134)
require.LessOrEqual(t, len(segments[1]), 134)

// Verify we can decode each segment
decoded1, err := LATIN1.Decode(segments[0])
require.Nil(t, err)
decoded2, err := LATIN1.Decode(segments[1])
require.Nil(t, err)
require.Equal(t, msg, decoded1+decoded2)
})

t.Run("testSplitLATIN1NoSplitNeeded", func(t *testing.T) {
// 100 chars - no split needed
msg := "This is a short message that does not need to be split because it is under the 134 octet limit here."
splitter, ok := LATIN1.(Splitter)
require.True(t, ok)

segments, err := splitter.EncodeSplit(msg, 134)
require.Nil(t, err)
require.Equal(t, 1, len(segments))
require.Equal(t, len(msg), len(segments[0]))
})

t.Run("testSplitGSM7Empty", func(t *testing.T) {
testEncodingSplit(t, GSM7BIT,
134,
Expand Down