Skip to content

Commit 82f911a

Browse files
committed
fix: implement Splitter interface for LATIN1 encoding
1 parent cd5e24c commit 82f911a

File tree

2 files changed

+119
-1
lines changed

2 files changed

+119
-1
lines changed

data/codings.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,42 @@ func (*iso88591) Decode(data []byte) (string, error) {
260260

261261
func (*iso88591) DataCoding() byte { return LATIN1Coding }
262262

263+
func (*iso88591) ShouldSplit(text string, octetLimit uint) bool {
264+
return uint(len(text)) > octetLimit
265+
}
266+
267+
func (c *iso88591) EncodeSplit(text string, octetLimit uint) ([][]byte, error) {
268+
var segments [][]byte
269+
runeSlice := []rune(text)
270+
limit := int(octetLimit)
271+
272+
for i := 0; i < len(runeSlice); {
273+
end := i + limit
274+
if end > len(runeSlice) {
275+
end = len(runeSlice)
276+
}
277+
278+
segment, err := c.Encode(string(runeSlice[i:end]))
279+
if err != nil {
280+
return nil, err
281+
}
282+
283+
// If encoded segment exceeds limit, reduce character count
284+
for len(segment) > limit && end > i+1 {
285+
end--
286+
segment, err = c.Encode(string(runeSlice[i:end]))
287+
if err != nil {
288+
return nil, err
289+
}
290+
}
291+
292+
segments = append(segments, segment)
293+
i = end
294+
}
295+
296+
return segments, nil
297+
}
298+
263299
type binary8bit1 struct{}
264300

265301
func (*binary8bit1) Encode(_ string) ([]byte, error) {

data/codings_test.go

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func shiftBitsOneRight(input []byte) []byte {
6464
}
6565

6666
func TestCoding(t *testing.T) {
67-
require.Nil(t, FromDataCoding(12))
67+
require.Equal(t, NewCustomEncoding(12, GSM7BIT), FromDataCoding(12)) // GSM7BIT is default when encoding is reserved
6868
require.Equal(t, GSM7BIT, FromDataCoding(0))
6969
require.Equal(t, ASCII, FromDataCoding(1))
7070
require.Equal(t, UCS2, FromDataCoding(8))
@@ -79,6 +79,23 @@ func TestGSM7Bit(t *testing.T) {
7979
}
8080

8181
func TestShouldSplit(t *testing.T) {
82+
t.Run("testShouldSplit_LATIN1", func(t *testing.T) {
83+
octetLim := uint(140)
84+
expect := map[string]bool{
85+
"": false,
86+
"1": false,
87+
"12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890": false, // exactly 140 chars
88+
"123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901": true, // 141 chars
89+
}
90+
91+
splitter, ok := LATIN1.(Splitter)
92+
require.True(t, ok, "LATIN1 must implement Splitter interface")
93+
for k, v := range expect {
94+
ok := splitter.ShouldSplit(k, octetLim)
95+
require.Equalf(t, v, ok, "Test case len=%d", len(k))
96+
}
97+
})
98+
8299
t.Run("testShouldSplit_GSM7BIT", func(t *testing.T) {
83100
octetLim := uint(140)
84101
expect := map[string]bool{
@@ -134,6 +151,71 @@ func TestShouldSplit(t *testing.T) {
134151
func TestSplit(t *testing.T) {
135152
require.EqualValues(t, 0o0, GSM7BITPACKED.DataCoding())
136153

154+
t.Run("testSplitLATIN1Empty", func(t *testing.T) {
155+
testEncodingSplit(t, LATIN1,
156+
134,
157+
"",
158+
[]string{
159+
"",
160+
},
161+
[]string{
162+
"",
163+
})
164+
})
165+
166+
t.Run("testSplitLATIN1", func(t *testing.T) {
167+
// 213 'a' characters - should split into 2 segments: 134 + 79
168+
msg := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
169+
splitter, ok := LATIN1.(Splitter)
170+
require.True(t, ok)
171+
172+
segments, err := splitter.EncodeSplit(msg, 134)
173+
require.Nil(t, err)
174+
require.Equal(t, 2, len(segments))
175+
require.Equal(t, 134, len(segments[0]))
176+
require.Equal(t, 79, len(segments[1]))
177+
178+
// Verify decoded content
179+
decoded1, err := LATIN1.Decode(segments[0])
180+
require.Nil(t, err)
181+
decoded2, err := LATIN1.Decode(segments[1])
182+
require.Nil(t, err)
183+
require.Equal(t, msg, decoded1+decoded2)
184+
})
185+
186+
t.Run("testSplitLATIN1WithSpecialChars", func(t *testing.T) {
187+
// Test with LATIN1 special characters (é, ñ, ü, etc.)
188+
// 150 chars total - should split into 2 segments: 134 + 16
189+
msg := "Héllo Wörld! Thís ís á tëst mëssägé wíth spëcíäl chäräctërs. Lét's sëé höw ít splíts. Möré téxt tö réäch thé límít. Änd ëvén möré tëxt tö mäké ít längér."
190+
splitter, ok := LATIN1.(Splitter)
191+
require.True(t, ok)
192+
193+
segments, err := splitter.EncodeSplit(msg, 134)
194+
require.Nil(t, err)
195+
require.Equal(t, 2, len(segments))
196+
require.LessOrEqual(t, len(segments[0]), 134)
197+
require.LessOrEqual(t, len(segments[1]), 134)
198+
199+
// Verify we can decode each segment
200+
decoded1, err := LATIN1.Decode(segments[0])
201+
require.Nil(t, err)
202+
decoded2, err := LATIN1.Decode(segments[1])
203+
require.Nil(t, err)
204+
require.Equal(t, msg, decoded1+decoded2)
205+
})
206+
207+
t.Run("testSplitLATIN1NoSplitNeeded", func(t *testing.T) {
208+
// 100 chars - no split needed
209+
msg := "This is a short message that does not need to be split because it is under the 134 octet limit here."
210+
splitter, ok := LATIN1.(Splitter)
211+
require.True(t, ok)
212+
213+
segments, err := splitter.EncodeSplit(msg, 134)
214+
require.Nil(t, err)
215+
require.Equal(t, 1, len(segments))
216+
require.Equal(t, len(msg), len(segments[0]))
217+
})
218+
137219
t.Run("testSplitGSM7Empty", func(t *testing.T) {
138220
testEncodingSplit(t, GSM7BIT,
139221
134,

0 commit comments

Comments
 (0)