Skip to content

Commit a6cda1a

Browse files
rpuneetclaude
andauthored
fix(id3): properly parse COMM and TXXX frames (#30)
Fixes #20 COMM (Comment) frames were displaying as "Binary data" instead of the actual comment text due to improper handling of the frame structure. Changes: - Fix decodeCommentFrame to properly handle description/comment separator - Add decodeUserTextFrame for TXXX frames - Add support for ID3v2.2 COM frames (in addition to COMM) - Handle edge case where description is missing (backward compat) COMM frame structure: encoding + language + description\0 + comment TXXX frame structure: encoding + description\0 + value Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent 02f42c3 commit a6cda1a

File tree

2 files changed

+280
-12
lines changed

2 files changed

+280
-12
lines changed

internal/parser/id3/id3.go

Lines changed: 100 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,12 @@ func parseFrame(r io.ReaderAt, pos int64, version byte) (*parser.Tag, int64, err
242242
// Attached picture - store metadata about it
243243
tag.Value = fmt.Sprintf("Picture (%d bytes)", frameSize)
244244
tag.DataType = "binary"
245-
} else if frameID == frameComment {
246-
// Comment frame
245+
} else if frameID == frameComment || frameID == frameV2Comment {
246+
// Comment frame (COMM for v2.3/2.4, COM for v2.2)
247247
tag.Value = decodeCommentFrame(frameData)
248+
} else if frameID == frameUserText {
249+
// User defined text frame (TXXX)
250+
tag.Value = decodeUserTextFrame(frameData)
248251
} else {
249252
// Generic binary frame
250253
tag.Value = fmt.Sprintf("Binary data (%d bytes)", frameSize)
@@ -298,29 +301,114 @@ func decodeTextFrame(data []byte) string {
298301
}
299302
}
300303

301-
// decodeCommentFrame decodes comment frame (COMM)
304+
// decodeCommentFrame decodes comment frame (COMM/COM)
305+
// Structure: encoding (1 byte) + language (3 bytes) + description (null-terminated) + comment
302306
func decodeCommentFrame(data []byte) string {
303-
if len(data) < 4 {
307+
if len(data) < 5 { // Minimum: encoding + language + null terminator
304308
return ""
305309
}
306310

307311
encoding := data[0]
308-
// Skip language (3 bytes) and short description
309-
// For simplicity, just decode the entire content
310-
text := data[4:]
312+
// Skip language (3 bytes at bytes 1-3)
313+
content := data[4:]
314+
315+
// Find the null terminator that separates description from comment
316+
// Null terminator size depends on encoding
317+
var commentStart int
318+
if encoding == encodingUTF16BOM || encoding == encodingUTF16BE {
319+
// UTF-16 uses double-null terminator
320+
nullPos := findDoubleNull(content)
321+
if nullPos >= 0 && nullPos+2 < len(content) {
322+
// There's content after the double-null, that's the comment
323+
commentStart = nullPos + 2
324+
} else {
325+
// No proper separator found, use entire content as comment
326+
commentStart = 0
327+
}
328+
} else {
329+
// ISO-8859-1 and UTF-8 use single null
330+
nullPos := findNull(content)
331+
if nullPos >= 0 && nullPos+1 < len(content) {
332+
// There's content after the null, that's the comment
333+
commentStart = nullPos + 1
334+
} else {
335+
// No proper separator found, use entire content as comment
336+
commentStart = 0
337+
}
338+
}
339+
340+
text := content[commentStart:]
341+
return decodeEncodedText(text, encoding)
342+
}
343+
344+
// decodeUserTextFrame decodes user defined text frame (TXXX)
345+
// Structure: encoding (1 byte) + description (null-terminated) + value
346+
func decodeUserTextFrame(data []byte) string {
347+
if len(data) < 2 {
348+
return ""
349+
}
350+
351+
encoding := data[0]
352+
content := data[1:]
353+
354+
// Find the null terminator that separates description from value
355+
var valueStart int
356+
if encoding == encodingUTF16BOM || encoding == encodingUTF16BE {
357+
nullPos := findDoubleNull(content)
358+
if nullPos >= 0 && nullPos+2 < len(content) {
359+
valueStart = nullPos + 2
360+
} else {
361+
// No proper separator, use entire content
362+
valueStart = 0
363+
}
364+
} else {
365+
nullPos := findNull(content)
366+
if nullPos >= 0 && nullPos+1 < len(content) {
367+
valueStart = nullPos + 1
368+
} else {
369+
// No proper separator, use entire content
370+
valueStart = 0
371+
}
372+
}
311373

374+
text := content[valueStart:]
375+
return decodeEncodedText(text, encoding)
376+
}
377+
378+
// decodeEncodedText decodes text based on encoding byte
379+
func decodeEncodedText(data []byte, encoding byte) string {
312380
switch encoding {
313381
case encodingISO88591:
314-
return string(trimNull(text))
382+
return string(trimNull(data))
315383
case encodingUTF16BOM:
316-
return decodeUTF16WithBOM(text)
384+
return decodeUTF16WithBOM(data)
317385
case encodingUTF16BE:
318-
return decodeUTF16BE(text)
386+
return decodeUTF16BE(data)
319387
case encodingUTF8:
320-
return string(trimNull(text))
388+
return string(trimNull(data))
321389
default:
322-
return string(trimNull(text))
390+
return string(trimNull(data))
391+
}
392+
}
393+
394+
// findNull finds the first null byte and returns its index, or -1 if not found
395+
func findNull(data []byte) int {
396+
for i, b := range data {
397+
if b == 0 {
398+
return i
399+
}
400+
}
401+
return -1
402+
}
403+
404+
// findDoubleNull finds the first double-null (UTF-16 terminator) and returns its index
405+
func findDoubleNull(data []byte) int {
406+
for i := 0; i < len(data)-1; i += 2 {
407+
if data[i] == 0 && data[i+1] == 0 {
408+
return i
409+
}
323410
}
411+
return -1
324412
}
325413

326414
// decodeUTF16WithBOM decodes UTF-16 with byte order mark

internal/parser/id3/id3_test.go

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,3 +1110,183 @@ func TestParser_Parse_TooManyFrames(t *testing.T) {
11101110
t.Fatalf("Parse() got %d directories, want 1", len(dirs))
11111111
}
11121112
}
1113+
1114+
func TestParser_Parse_CommentFrame_WithDescription(t *testing.T) {
1115+
var buf bytes.Buffer
1116+
1117+
// COMM frame with proper structure: encoding + lang + description\0 + comment\0
1118+
buf.Write([]byte{'I', 'D', '3'})
1119+
buf.WriteByte(0x03)
1120+
buf.WriteByte(0x00)
1121+
buf.WriteByte(0x00)
1122+
buf.Write(encodeSynchsafeInt(0x30))
1123+
1124+
// COMM frame: encoding + lang + desc\0 + comment\0
1125+
commFrame := []byte{
1126+
0x00, // Encoding: ISO-8859-1
1127+
'e', 'n', 'g', // Language
1128+
'S', 'h', 'o', 'r', 't', 0x00, // Description: "Short\0"
1129+
'T', 'h', 'i', 's', ' ', 'i', 's', ' ', // Comment text
1130+
't', 'h', 'e', ' ', 'c', 'o', 'm', 'm',
1131+
'e', 'n', 't', 0x00,
1132+
}
1133+
1134+
buf.Write([]byte{'C', 'O', 'M', 'M'})
1135+
buf.Write([]byte{0x00, 0x00, 0x00, byte(len(commFrame))})
1136+
buf.Write([]byte{0x00, 0x00}) // Flags
1137+
buf.Write(commFrame)
1138+
1139+
p := New()
1140+
r := bytes.NewReader(buf.Bytes())
1141+
1142+
dirs, err := p.Parse(r)
1143+
if err != nil {
1144+
t.Fatalf("Parse() error = %v", err)
1145+
}
1146+
1147+
// Check for comment frame with correct value
1148+
foundComment := false
1149+
for _, tag := range dirs[0].Tags {
1150+
if tag.Name == "Comment" {
1151+
foundComment = true
1152+
if tag.Value != "This is the comment" {
1153+
t.Errorf("Comment value = %q, want %q", tag.Value, "This is the comment")
1154+
}
1155+
}
1156+
}
1157+
if !foundComment {
1158+
t.Error("Comment frame not found")
1159+
}
1160+
}
1161+
1162+
func TestParser_Parse_UserTextFrame(t *testing.T) {
1163+
var buf bytes.Buffer
1164+
1165+
// TXXX frame with structure: encoding + description\0 + value\0
1166+
buf.Write([]byte{'I', 'D', '3'})
1167+
buf.WriteByte(0x03)
1168+
buf.WriteByte(0x00)
1169+
buf.WriteByte(0x00)
1170+
buf.Write(encodeSynchsafeInt(0x30))
1171+
1172+
// TXXX frame: encoding + desc\0 + value\0
1173+
txxxFrame := []byte{
1174+
0x00, // Encoding: ISO-8859-1
1175+
'M', 'y', 'D', 'e', 's', 'c', 0x00, // Description: "MyDesc\0"
1176+
'M', 'y', 'V', 'a', 'l', 'u', 'e', 0x00, // Value: "MyValue\0"
1177+
}
1178+
1179+
buf.Write([]byte{'T', 'X', 'X', 'X'})
1180+
buf.Write([]byte{0x00, 0x00, 0x00, byte(len(txxxFrame))})
1181+
buf.Write([]byte{0x00, 0x00}) // Flags
1182+
buf.Write(txxxFrame)
1183+
1184+
p := New()
1185+
r := bytes.NewReader(buf.Bytes())
1186+
1187+
dirs, err := p.Parse(r)
1188+
if err != nil {
1189+
t.Fatalf("Parse() error = %v", err)
1190+
}
1191+
1192+
// Check for user text frame with correct value
1193+
foundTXXX := false
1194+
for _, tag := range dirs[0].Tags {
1195+
if tag.Name == "User Defined Text" {
1196+
foundTXXX = true
1197+
if tag.Value != "MyValue" {
1198+
t.Errorf("User Defined Text value = %q, want %q", tag.Value, "MyValue")
1199+
}
1200+
}
1201+
}
1202+
if !foundTXXX {
1203+
t.Error("User Defined Text frame not found")
1204+
}
1205+
}
1206+
1207+
func TestParser_Parse_ID3v22_CommentFrame(t *testing.T) {
1208+
var buf bytes.Buffer
1209+
1210+
// ID3v2.2 COM frame
1211+
buf.Write([]byte{'I', 'D', '3'})
1212+
buf.WriteByte(0x02) // Version 2.2
1213+
buf.WriteByte(0x00)
1214+
buf.WriteByte(0x00)
1215+
buf.Write(encodeSynchsafeInt(0x20))
1216+
1217+
// COM frame (3-char ID for v2.2): encoding + lang + comment
1218+
comFrame := []byte{
1219+
0x00, // Encoding: ISO-8859-1
1220+
'e', 'n', 'g', // Language
1221+
'H', 'e', 'l', 'l', 'o', 0x00, // Comment: "Hello\0"
1222+
}
1223+
1224+
buf.Write([]byte{'C', 'O', 'M'})
1225+
buf.Write([]byte{0x00, 0x00, byte(len(comFrame))}) // 3-byte size for v2.2
1226+
buf.Write(comFrame)
1227+
1228+
p := New()
1229+
r := bytes.NewReader(buf.Bytes())
1230+
1231+
dirs, err := p.Parse(r)
1232+
if err != nil {
1233+
t.Fatalf("Parse() error = %v", err)
1234+
}
1235+
1236+
// Check for comment frame
1237+
foundComment := false
1238+
for _, tag := range dirs[0].Tags {
1239+
if tag.Name == "Comment" {
1240+
foundComment = true
1241+
if tag.Value != "Hello" {
1242+
t.Errorf("Comment value = %q, want %q", tag.Value, "Hello")
1243+
}
1244+
}
1245+
}
1246+
if !foundComment {
1247+
t.Error("ID3v2.2 Comment frame not found")
1248+
}
1249+
}
1250+
1251+
func TestDecodeUserTextFrame(t *testing.T) {
1252+
tests := []struct {
1253+
name string
1254+
data []byte
1255+
want string
1256+
}{
1257+
{
1258+
name: "ISO-8859-1 with description",
1259+
data: []byte{0x00, 'd', 'e', 's', 'c', 0x00, 'v', 'a', 'l', 'u', 'e', 0x00},
1260+
want: "value",
1261+
},
1262+
{
1263+
name: "ISO-8859-1 no description",
1264+
data: []byte{0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0x00},
1265+
want: "value",
1266+
},
1267+
{
1268+
name: "UTF-8 with description",
1269+
data: []byte{0x03, 'd', 'e', 's', 'c', 0x00, 'v', 'a', 'l', 'u', 'e', 0x00},
1270+
want: "value",
1271+
},
1272+
{
1273+
name: "too short",
1274+
data: []byte{0x00},
1275+
want: "",
1276+
},
1277+
{
1278+
name: "empty",
1279+
data: []byte{},
1280+
want: "",
1281+
},
1282+
}
1283+
1284+
for _, tt := range tests {
1285+
t.Run(tt.name, func(t *testing.T) {
1286+
got := decodeUserTextFrame(tt.data)
1287+
if got != tt.want {
1288+
t.Errorf("decodeUserTextFrame() = %q, want %q", got, tt.want)
1289+
}
1290+
})
1291+
}
1292+
}

0 commit comments

Comments
 (0)