Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions data/codings.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,32 @@ func (*ascii) Decode(data []byte) (string, error) {

func (*ascii) DataCoding() byte { return ASCIICoding }

func (*ascii) ShouldSplit(text string, octetLimit uint) bool {
return uint(len(text)) > octetLimit
}

func (c *ascii) EncodeSplit(text string, octetLimit uint) ([][]byte, error) {
var segments [][]byte
limit := int(octetLimit)

for i := 0; i < len(text); {
end := i + limit
if end > len(text) {
end = len(text)
}

segment, err := c.Encode(text[i:end])
if err != nil {
return nil, err
}

segments = append(segments, segment)
i = end
}

return segments, nil
}

type iso88591 struct{}

func (*iso88591) Encode(str string) ([]byte, error) {
Expand Down Expand Up @@ -296,6 +322,42 @@ func (*iso88595) Decode(data []byte) (string, error) {

func (*iso88595) DataCoding() byte { return CYRILLICCoding }

func (*iso88595) ShouldSplit(text string, octetLimit uint) bool {
return uint(len([]rune(text))) > octetLimit
}

func (c *iso88595) EncodeSplit(text string, octetLimit uint) ([][]byte, error) {
var segments [][]byte
runeSlice := []rune(text)
limit := int(octetLimit)

for i := 0; i < len(runeSlice); {
end := i + limit
if end > len(runeSlice) {
end = len(runeSlice)
}

segment, err := c.Encode(string(runeSlice[i:end]))
if err != nil {
return nil, err
}

// If encoded segment exceeds limit, reduce character count
for len(segment) > limit && end > i+1 {
end--
segment, err = c.Encode(string(runeSlice[i:end]))
if err != nil {
return nil, err
}
}

segments = append(segments, segment)
i = end
}

return segments, nil
}

type iso88598 struct{}

func (*iso88598) Encode(str string) ([]byte, error) {
Expand All @@ -308,6 +370,42 @@ func (*iso88598) Decode(data []byte) (string, error) {

func (*iso88598) DataCoding() byte { return HEBREWCoding }

func (*iso88598) ShouldSplit(text string, octetLimit uint) bool {
return uint(len([]rune(text))) > octetLimit
}

func (c *iso88598) EncodeSplit(text string, octetLimit uint) ([][]byte, error) {
var segments [][]byte
runeSlice := []rune(text)
limit := int(octetLimit)

for i := 0; i < len(runeSlice); {
end := i + limit
if end > len(runeSlice) {
end = len(runeSlice)
}

segment, err := c.Encode(string(runeSlice[i:end]))
if err != nil {
return nil, err
}

// If encoded segment exceeds limit, reduce character count
for len(segment) > limit && end > i+1 {
end--
segment, err = c.Encode(string(runeSlice[i:end]))
if err != nil {
return nil, err
}
}

segments = append(segments, segment)
i = end
}

return segments, nil
}

type ucs2 struct{}

func (*ucs2) Encode(str string) ([]byte, error) {
Expand Down
105 changes: 105 additions & 0 deletions data/codings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,51 @@ func TestGSM7Bit(t *testing.T) {
}

func TestShouldSplit(t *testing.T) {
t.Run("testShouldSplit_ASCII", func(t *testing.T) {
octetLim := uint(140)
expect := map[string]bool{
"": false,
"1": false,
"12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890": false, // exactly 140 chars
"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901": true, // 141 chars
}

splitter, ok := ASCII.(Splitter)
require.True(t, ok, "ASCII must implement Splitter interface")
for k, v := range expect {
ok := splitter.ShouldSplit(k, octetLim)
require.Equalf(t, v, ok, "Test case len=%d", len(k))
}
})

t.Run("testShouldSplit_CYRILLIC", func(t *testing.T) {
octetLim := uint(140)
splitter, ok := CYRILLIC.(Splitter)
require.True(t, ok, "CYRILLIC must implement Splitter interface")

// 140 Cyrillic chars
msg140 := "аааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааа"
require.False(t, splitter.ShouldSplit(msg140, octetLim), "140 chars should not split")

// 141 Cyrillic chars
msg141 := "ааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааа"
require.True(t, splitter.ShouldSplit(msg141, octetLim), "141 chars should split")
})

t.Run("testShouldSplit_HEBREW", func(t *testing.T) {
octetLim := uint(140)
splitter, ok := HEBREW.(Splitter)
require.True(t, ok, "HEBREW must implement Splitter interface")

// 140 Hebrew chars
msg140 := "אאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאא"
require.False(t, splitter.ShouldSplit(msg140, octetLim), "140 chars should not split")

// 141 Hebrew chars
msg141 := "אאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאא"
require.True(t, splitter.ShouldSplit(msg141, octetLim), "141 chars should split")
})

t.Run("testShouldSplit_GSM7BIT", func(t *testing.T) {
octetLim := uint(140)
expect := map[string]bool{
Expand Down Expand Up @@ -134,6 +179,66 @@ func TestShouldSplit(t *testing.T) {
func TestSplit(t *testing.T) {
require.EqualValues(t, 0o0, GSM7BITPACKED.DataCoding())

t.Run("testSplitASCII", func(t *testing.T) {
// 212 ASCII chars - should split into 2 segments: 134 + 78
msg := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
splitter, ok := ASCII.(Splitter)
require.True(t, ok)

segments, err := splitter.EncodeSplit(msg, 134)
require.Nil(t, err)
require.Equal(t, 2, len(segments))
require.Equal(t, 134, len(segments[0]))
require.Equal(t, 78, len(segments[1]))

// Verify decoded content
decoded1, err := ASCII.Decode(segments[0])
require.Nil(t, err)
decoded2, err := ASCII.Decode(segments[1])
require.Nil(t, err)
require.Equal(t, msg, decoded1+decoded2)
})

t.Run("testSplitCYRILLIC", func(t *testing.T) {
// 200 Cyrillic chars - should split into 2 segments
msg := "аааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааааа"
splitter, ok := CYRILLIC.(Splitter)
require.True(t, ok)

segments, err := splitter.EncodeSplit(msg, 134)
require.Nil(t, err)
require.Equal(t, 2, len(segments))
require.LessOrEqual(t, len(segments[0]), 134)
require.LessOrEqual(t, len(segments[1]), 134)

// Verify decoded content
decoded1, err := CYRILLIC.Decode(segments[0])
require.Nil(t, err)
decoded2, err := CYRILLIC.Decode(segments[1])
require.Nil(t, err)
require.Equal(t, msg, decoded1+decoded2)
})

t.Run("testSplitHEBREW", func(t *testing.T) {
// 200 Hebrew chars - should split into 2 segments
msg := "אאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאאא"
splitter, ok := HEBREW.(Splitter)
require.True(t, ok)

segments, err := splitter.EncodeSplit(msg, 134)
require.Nil(t, err)
require.Equal(t, 2, len(segments))
require.LessOrEqual(t, len(segments[0]), 134)
require.LessOrEqual(t, len(segments[1]), 134)

// Verify decoded content
decoded1, err := HEBREW.Decode(segments[0])
require.Nil(t, err)
decoded2, err := HEBREW.Decode(segments[1])
require.Nil(t, err)
require.Equal(t, msg, decoded1+decoded2)
})

t.Run("testSplitGSM7Empty", func(t *testing.T) {
testEncodingSplit(t, GSM7BIT,
134,
Expand Down