Skip to content

Commit b02ba82

Browse files
committed
add iso8859_1 in ID3 header
1 parent e1f8892 commit b02ba82

File tree

2 files changed

+186
-105
lines changed

2 files changed

+186
-105
lines changed

src/Audio.cpp

Lines changed: 43 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
audio.cpp
44
55
Created on: Oct 28.2018 */char audioI2SVers[] ="\
6-
Version 3.4.0i ";
6+
Version 3.4.0j ";
77
/* Updated on: Jul 28.2025
88
99
Author: Wolle (schreibfaul1)
@@ -1877,6 +1877,7 @@ int Audio::read_ID3_Header(uint8_t* data, size_t len) {
18771877
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18781878
if(m_controlCounter == 6) { // Read the value
18791879
m_controlCounter = 5; // only read 256 bytes
1880+
18801881
uint8_t textEncodingByte = *(data + 0); // ID3v2 Text-Encoding-Byte
18811882
// $00 – ISO-8859-1 (LATIN-1, Identical to ASCII for values smaller than 0x80).
18821883
// $01 – UCS-2 encoded Unicode with BOM (Byte Order Mark), in ID3v2.2 and ID3v2.3.
@@ -1897,6 +1898,7 @@ int Audio::read_ID3_Header(uint8_t* data, size_t len) {
18971898
return 0;
18981899
}
18991900

1901+
19001902
if( // proprietary not standard information
19011903
startsWith(m_ID3Hdr.tag, "PRIV")) {
19021904
;//AUDIO_LOG_ERROR("PRIV");
@@ -1905,78 +1907,51 @@ int Audio::read_ID3_Header(uint8_t* data, size_t len) {
19051907

19061908
if(m_ID3Hdr.framesize == 0) return 0;
19071909

1910+
ps_ptr<char> tmp;
19081911
size_t fs = m_ID3Hdr.framesize; // fs = size of the frame data field as read from header
19091912
size_t bytesToCopy = fs;
1910-
1911-
if (bytesToCopy >= m_ID3Hdr.iBuffSize) { // <= oder >= hier ist wichtig!
1912-
bytesToCopy = m_ID3Hdr.iBuffSize - 1; // Sicherstellen, dass ein Null-Terminator passt
1913-
}
19141913
size_t textDataLength = 0;
1915-
if (bytesToCopy > 0) { // Nur wenn überhaupt Daten da sind, die wir kürzen können
1916-
textDataLength = bytesToCopy - 1; // Dies ist die Anzahl der zu kopierenden TEXT-Bytes
1914+
1915+
if (bytesToCopy >= m_ID3Hdr.iBuffSize) { bytesToCopy = m_ID3Hdr.iBuffSize - 1;} // make sure a zero terminator fits
1916+
if (bytesToCopy > 0) { textDataLength = bytesToCopy - 1;} // Only if there are data that we can shorten
1917+
for (int i = 0; i < textDataLength; i++) {
1918+
m_ID3Hdr.iBuff[i] = *(data + i + 1); // Skipped the first byte (Encoding)
19171919
}
1918-
for(int i = 0; i < textDataLength; i++) {
1919-
m_ID3Hdr.iBuff[i] = *(data + i + 1); // Überspringt das erste Byte (Encoding)
1920+
1921+
if (textEncodingByte == 1 || textEncodingByte == 2) { // is UTF-16LE or UTF-16BE
1922+
m_ID3Hdr.iBuff[textDataLength] = 0; // UTF-16: set double zero terminator
1923+
m_ID3Hdr.iBuff[textDataLength + 1] = 0; // second '\0' for UTF-16
1924+
} else {
1925+
m_ID3Hdr.iBuff[textDataLength] = 0; // only one '\0' for ISO-8859-1 or UTF-8
19201926
}
1921-
m_ID3Hdr.iBuff[textDataLength] = 0;
1927+
19221928
m_ID3Hdr.framesize -= fs;
19231929
m_ID3Hdr.remainingHeaderBytes -= fs;
19241930
uint16_t dataLength = fs - 1;
19251931

1926-
if(textEncodingByte == 0){ // latin
1927-
latinToUTF8(m_ID3Hdr.iBuff, false);
1928-
showID3Tag(m_ID3Hdr.tag, m_ID3Hdr.iBuff.get());
1929-
}
1930-
1931-
if(textEncodingByte == 1 && dataLength > 1) { // UTF16 with BOM
1932-
int8_t data_start = 0;
1933-
if(startsWith(m_ID3Hdr.tag, "COMM")){ // language code
1934-
m_ID3Hdr.lang[0] = m_ID3Hdr.iBuff[0];
1935-
m_ID3Hdr.lang[1] = m_ID3Hdr.iBuff[1];
1936-
m_ID3Hdr.lang[2] = m_ID3Hdr.iBuff[2];
1937-
m_ID3Hdr.lang[3] = '\0';
1938-
data_start += 3;
1939-
// log_w("language code: %s", m_ID3Hdr.lang);
1940-
m_ID3Hdr.byteOrderMark = static_cast<unsigned char>(m_ID3Hdr.iBuff[data_start]) == 0xFE && static_cast<unsigned char>(m_ID3Hdr.iBuff[data_start]) == 0xFF;
1941-
data_start += 2;
1942-
m_ID3Hdr.contentDescriptorTerminator_0 = m_ID3Hdr.iBuff[data_start];
1943-
m_ID3Hdr.contentDescriptorTerminator_1 = m_ID3Hdr.iBuff[data_start + 1];
1944-
m_ID3Hdr.textStringTerminator_0 = m_ID3Hdr.iBuff[data_start + 2];
1945-
m_ID3Hdr.textStringTerminator_1 = m_ID3Hdr.iBuff[data_start + 3];
1946-
data_start += 4;
1947-
}
1948-
else{
1949-
m_ID3Hdr.byteOrderMark = static_cast<unsigned char>(m_ID3Hdr.iBuff[data_start]) == 0xFE && static_cast<unsigned char>(m_ID3Hdr.iBuff[data_start]) == 0xFF;
1950-
data_start += 2;
1951-
}
1952-
1953-
std::u16string utf16_string;
1954-
for (size_t i = data_start; i < dataLength; i += 2) {
1955-
char16_t wchar;
1956-
if(m_ID3Hdr.byteOrderMark) wchar = (static_cast<unsigned char>(m_ID3Hdr.iBuff[i]) << 8) | static_cast<unsigned char>(m_ID3Hdr.iBuff[i + 1]);
1957-
else wchar = (static_cast<unsigned char>(m_ID3Hdr.iBuff[i + 1]) << 8) | static_cast<unsigned char>(m_ID3Hdr.iBuff[i]);
1958-
utf16_string.push_back(wchar);
1959-
}
1960-
1961-
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
1962-
showID3Tag(m_ID3Hdr.tag, converter.to_bytes(utf16_string).c_str());
1932+
if(startsWith(m_ID3Hdr.tag, "COMM")){ // language code
1933+
m_ID3Hdr.lang[0] = m_ID3Hdr.iBuff[0];
1934+
m_ID3Hdr.lang[1] = m_ID3Hdr.iBuff[1];
1935+
m_ID3Hdr.lang[2] = m_ID3Hdr.iBuff[2];
1936+
m_ID3Hdr.lang[3] = '\0';
1937+
m_ID3Hdr.iBuff.shift_left(4);
19631938
}
19641939

1965-
if(textEncodingByte == 2 && dataLength > 1) { // UTF16BE
1966-
1967-
std::u16string utf16_string;
1968-
for (size_t i = 0; i < dataLength; i += 2) {
1969-
char16_t wchar = (static_cast<unsigned char>(m_ID3Hdr.iBuff[i]) << 8) | static_cast<unsigned char>(m_ID3Hdr.iBuff[i + 1]);
1970-
utf16_string.push_back(wchar);
1971-
}
1940+
char encodingTab [4][12] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8"};
1941+
// AUDIO_LOG_INFO("Tag: %s, Length: %i, Format: %s", m_ID3Hdr.tag, textDataLength, encodingTab[textEncodingByte]);
19721942

1973-
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
1974-
showID3Tag(m_ID3Hdr.tag, converter.to_bytes(utf16_string).c_str());
1943+
if (textEncodingByte == 0) { // ISO-8859-1
1944+
tmp.copy_from_iso8859_1((const uint8_t*)m_ID3Hdr.iBuff.get(), "tmp");
1945+
} else if (textEncodingByte == 1 || textEncodingByte == 2) { // UTF-16LE oder UTF-16BE
1946+
bool isBigEndian = (textEncodingByte == 2);
1947+
tmp.copy_from_utf16((const uint8_t*)m_ID3Hdr.iBuff.get(), isBigEndian, "tmp");
1948+
} else if (textEncodingByte == 3) { // UTF-8
1949+
// Direkt kopieren, da keine Konvertierung nötig ist
1950+
tmp.copy_from(m_ID3Hdr.iBuff.get());
19751951
}
19761952

1977-
if(textEncodingByte == 3) { // utf8
1978-
showID3Tag(m_ID3Hdr.tag, m_ID3Hdr.iBuff.get());
1979-
}
1953+
showID3Tag(m_ID3Hdr.tag, tmp.c_get());
1954+
19801955
return fs;
19811956
}
19821957
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -2009,7 +1984,6 @@ int Audio::read_ID3_Header(uint8_t* data, size_t len) {
20091984
if(!res){AUDIO_LOG_ERROR("http range request was not successful"); return 0;}
20101985
res = parseHttpRangeHeader();
20111986
if(!res){AUDIO_LOG_ERROR("http range response was not successful"); return 0;}
2012-
// return 0;
20131987
}
20141988
if(m_dataMode == AUDIO_LOCALFILE){
20151989
uint32_t pos = m_audiofile.position();
@@ -2031,22 +2005,17 @@ int Audio::read_ID3_Header(uint8_t* data, size_t len) {
20312005
m_ID3Hdr.SYLT.time_stamp_format = syltBuff[4];
20322006
m_ID3Hdr.SYLT.content_type = syltBuff[5];
20332007
idx = 6;
2034-
if(m_ID3Hdr.SYLT.text_encoding == 0 || m_ID3Hdr.SYLT.text_encoding == 3){ // utf-8
2035-
len = content_descriptor.copy_from((const char*)(syltBuff.get() + idx), "content_descriptor");
2036-
}
2037-
else{ // utf-16
2038-
len = content_descriptor.copy_from_utf16((const uint8_t*)(syltBuff.get() + idx), isBigEndian, "content_descriptor");
2039-
}
2008+
if (m_ID3Hdr.SYLT.text_encoding == 0) len = 1 + content_descriptor.copy_from_iso8859_1((const uint8_t*)(syltBuff.get() + idx), "content_descriptor"); // iso8859_1
2009+
else if(m_ID3Hdr.SYLT.text_encoding == 3) len = 1 + content_descriptor.copy_from((const char*)(syltBuff.get() + idx), "content_descriptor"); // utf-8
2010+
else len = 2 + content_descriptor.copy_from_utf16((const uint8_t*)(syltBuff.get() + idx), isBigEndian, "content_descriptor");// utf-16
20402011
if(len > 2) AUDIO_INFO("Lyrics: content_descriptor: %s", content_descriptor.c_get());
2012+
20412013
idx += len;
20422014
while (idx < m_ID3Hdr.SYLT.size) {
2043-
// UTF-16LE, UTF-16BE
2044-
if (m_ID3Hdr.SYLT.text_encoding == 1 || m_ID3Hdr.SYLT.text_encoding == 2) {
2045-
idx += tmp.copy_from_utf16((const uint8_t*)(syltBuff.get() + idx), isBigEndian, "sylt-text");
2046-
} else {
2047-
// ISO-8859-1 / UTF-8
2048-
idx += tmp.copy_from((const char*)syltBuff.get() + idx, "sylt-text");
2049-
}
2015+
if (m_ID3Hdr.SYLT.text_encoding == 0) idx += 1 + tmp.copy_from_iso8859_1((const uint8_t*)syltBuff.get() + idx, "sylt-text"); // ISO8859_1
2016+
else if (m_ID3Hdr.SYLT.text_encoding == 3) idx += 1 + tmp.copy_from((const char*)syltBuff.get() + idx, "sylt-text"); // UTF-8
2017+
else idx += 2 + tmp.copy_from_utf16((const uint8_t*)(syltBuff.get() + idx), isBigEndian, "sylt-text"); // UTF-16LE, UTF-16BE
2018+
20502019
if (tmp.starts_with("\n")) tmp.remove_before(1);
20512020
m_syltLines.push_back(std::move(tmp));
20522021
if (idx + 4 > m_ID3Hdr.SYLT.size) break; // no more 4 bytes?
@@ -2061,13 +2030,9 @@ int Audio::read_ID3_Header(uint8_t* data, size_t len) {
20612030
return 0;
20622031
}
20632032

2064-
2065-
2066-
2067-
20682033
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20692034

2070-
// --- section V2.2 only , greater Vers above ----
2035+
// --- section V2.2 only , higher Vers above ----
20712036
// see https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.2.html
20722037
if(m_controlCounter == 10) { // frames in V2.2, 3bytes identifier, 3bytes size descriptor
20732038

src/psram_unique_ptr.hpp

Lines changed: 143 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -299,48 +299,133 @@ class ps_ptr {
299299
// is UTF-16LE and will converted to: "Little London Girl"
300300
// UTF-16LE and UTF-16BE is often found in ID3 header
301301

302-
size_t copy_from_utf16(const uint8_t* src, bool is_big_endian = false, const char* name = nullptr) {
303-
if (!src) { log_e("arg. is null"); return 0; }
304-
std::vector<char> out;
305-
size_t i = 0;
302+
#include <vector>
303+
#include <cstdint>
304+
#include <cstring>
305+
#include <stdexcept>
306306

307-
if(is_big_endian == false){ // maybe we have a BOM
308-
if (src[i] == 0xFF && src[i + 1] == 0xFE) {
309-
is_big_endian = false; // UTF-16 Little Endian
310-
i += 2; // skip byte order mark + \0\0
311-
} else if (src[i] == 0xFE && src[i + 1] == 0xFF) {
312-
is_big_endian = true; // UTF-16 Big Endian
313-
i += 2; // skip byte order mark + \0\0
314-
} else {
315-
// BOM is missing or invalid
316-
}
307+
// convert UTF-16 to UTF-8 and stop at zero terminator
308+
size_t copy_from_utf16(const uint8_t* src, bool is_big_endian = false, const char* name = nullptr) {
309+
if (!src) {
310+
log_e("arg. is null");
311+
return 0;
312+
}
313+
std::vector<char> out;
314+
size_t i = 0;
315+
316+
// BOM-Handling
317+
if (src[i] == 0xFF && src[i + 1] == 0xFE) {
318+
is_big_endian = false; // UTF-16LE
319+
i += 2;
320+
} else if (src[i] == 0xFE && src[i + 1] == 0xFF) {
321+
is_big_endian = true; // UTF-16BE
322+
i += 2;
323+
}
324+
325+
while (true) {
326+
// Prüfe, ob genug Bytes für ein UTF-16-Zeichen vorhanden sind
327+
if (i + 1 >= std::numeric_limits<size_t>::max() || (src[i] == 0x00 && src[i + 1] == 0x00)) {
328+
break; // Nullterminator oder Ende des Puffers
317329
}
318330

319-
while (true) {
320-
uint16_t ch;
331+
uint16_t ch;
332+
if (is_big_endian) {
333+
ch = (src[i] << 8) | src[i + 1];
334+
} else {
335+
ch = (src[i + 1] << 8) | src[i];
336+
}
337+
i += 2;
338+
339+
uint32_t codepoint = ch;
340+
341+
// Prüfe auf Surrogatenpaare
342+
if (ch >= 0xD800 && ch <= 0xDBFF) { // High surrogate
343+
if ((i + 1 >= std::numeric_limits<size_t>::max()) || (src[i] == 0x00 && src[i + 1] == 0x00)) {
344+
log_e("Invalid surrogate pair: missing low surrogate");
345+
break;
346+
}
347+
uint16_t ch2;
321348
if (is_big_endian) {
322-
ch = (src[i] << 8) | src[i + 1];
349+
ch2 = (src[i] << 8) | src[i + 1];
323350
} else {
324-
ch = (src[i + 1] << 8) | src[i];
351+
ch2 = (src[i + 1] << 8) | src[i];
352+
}
353+
if (ch2 < 0xDC00 || ch2 > 0xDFFF) {
354+
log_e("Invalid surrogate pair: invalid low surrogate");
355+
break;
325356
}
326357
i += 2;
327-
if (ch == 0x0000) break; // null-terminiert
358+
codepoint = 0x10000 + ((ch - 0xD800) << 10) + (ch2 - 0xDC00);
359+
} else if (ch >= 0xDC00 && ch <= 0xDFFF) {
360+
log_e("Invalid surrogate pair: unexpected low surrogate");
361+
break;
362+
}
363+
364+
// UTF-16 → UTF-8
365+
if (codepoint < 0x80) {
366+
out.push_back(static_cast<char>(codepoint));
367+
} else if (codepoint < 0x800) {
368+
out.push_back(0xC0 | (codepoint >> 6));
369+
out.push_back(0x80 | (codepoint & 0x3F));
370+
} else if (codepoint < 0x10000) {
371+
out.push_back(0xE0 | (codepoint >> 12));
372+
out.push_back(0x80 | ((codepoint >> 6) & 0x3F));
373+
out.push_back(0x80 | (codepoint & 0x3F));
374+
} else if (codepoint < 0x110000) {
375+
out.push_back(0xF0 | (codepoint >> 18));
376+
out.push_back(0x80 | ((codepoint >> 12) & 0x3F));
377+
out.push_back(0x80 | ((codepoint >> 6) & 0x3F));
378+
out.push_back(0x80 | (codepoint & 0x3F));
379+
} else {
380+
log_e("Invalid codepoint");
381+
break;
382+
}
383+
}
384+
385+
// Nullterminator hinzufügen
386+
out.push_back('\0');
387+
388+
// Speicher allozieren und kopieren
389+
std::size_t bytes = out.size();
390+
alloc(bytes, name);
391+
std::memcpy(mem.get(), out.data(), bytes);
392+
return i;
393+
}
394+
395+
// —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
396+
// 📌📌📌 C O P Y _ F R O M _ I S O _ 8 8 5 9 - 1 📌📌📌
397+
// convert ISO 8859-1 to UTF-8 and stop at zero terminator
398+
// 0x48 0x65 0x6C 0x6C 0x6F 0x20 0xC3 0xA4 0x62 0x63 0x00 -> "Hello äbc"
328399

329-
// UTF-16 → UTF-8
400+
size_t copy_from_iso8859_1(const uint8_t* src, const char* name = nullptr) {
401+
if (!src) {
402+
log_e("arg. is null");
403+
return 0;
404+
}
405+
std::vector<char> out;
406+
size_t i = 0;
407+
408+
while (true) {
409+
uint8_t ch = src[i];
410+
if (ch == 0x00) {
411+
break; // 'Nullterminator'
412+
}
413+
414+
// ISO-8859-1 → UTF-8
330415
if (ch < 0x80) {
331-
out.push_back(static_cast<char>(ch));
332-
} else if (ch < 0x800) {
333-
out.push_back(0xC0 | (ch >> 6));
334-
out.push_back(0x80 | (ch & 0x3F));
416+
out.push_back(static_cast<char>(ch)); // Ascii area remains unchanged
335417
} else {
336-
out.push_back(0xE0 | (ch >> 12));
337-
out.push_back(0x80 | ((ch >> 6) & 0x3F));
418+
// chars from 0x80 to 0xff are coded as 2-byte sequences in UTF-8
419+
out.push_back(0xC0 | (ch >> 6));
338420
out.push_back(0x80 | (ch & 0x3F));
339421
}
422+
i++;
340423
}
424+
425+
// add zero terminator
341426
out.push_back('\0');
342427

343-
// save
428+
// allocate and copy memory
344429
std::size_t bytes = out.size();
345430
alloc(bytes, name);
346431
std::memcpy(mem.get(), out.data(), bytes);
@@ -1436,6 +1521,37 @@ void unicodeToUTF8(const char* src) {
14361521
std::size_t remaining = std::strlen(pos);
14371522
std::memmove(str, pos, remaining + 1); // inkl. '\0'
14381523
}
1524+
// —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
1525+
// 📌📌📌 S H I F T _ L E F T 📌📌📌
1526+
// Show the contents of the buffer around n bytes to the left and fill the rest with zeros
1527+
// Consider UTF-16 data and the size of the allocated memory
1528+
1529+
void shift_left(int n) {
1530+
if (!mem || allocated_size == 0) {
1531+
printf("Error: No allocated memory or invalid buffer\n");
1532+
return;
1533+
}
1534+
1535+
if (n < 0 || static_cast<std::size_t>(n) > allocated_size) {
1536+
printf("Error: Invalid shift amount %d, allocated_size=%zu\n", n, allocated_size);
1537+
return;
1538+
}
1539+
1540+
// if (n % 2 != 0) {
1541+
// printf("Warning: Shift amount %d is not even, adjusting to %d for UTF-16 alignment\n", n, n + 1);
1542+
// n += 1; // make sure n is even for UTF-16
1543+
// }
1544+
1545+
char* str = mem.get();
1546+
if (n == 0) return;
1547+
1548+
// show the buffer around n bytes to the left
1549+
std::size_t remaining = allocated_size - n;
1550+
std::memmove(str, str + n, remaining);
1551+
1552+
// fill the rest of the memory with zeros
1553+
std::memset(str + remaining, 0, n);
1554+
}
14391555
// —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
14401556
// 📌📌📌 C O N T A I N S 📌📌📌
14411557

0 commit comments

Comments
 (0)