Skip to content

Commit e39d924

Browse files
committed
input/TextInputStream: Strip UTF-8 BOM on first ReadLine()
Fixes parsing of cue sheets with UTF-8 BOM. The BOM is now detected and consumed before the first line is parsed.
1 parent e5df578 commit e39d924

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

src/input/TextInputStream.cxx

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,26 @@ TextInputStream::~TextInputStream() noexcept = default;
1515
char *
1616
TextInputStream::ReadLine()
1717
{
18+
if (!bom_checked) {
19+
bom_checked = true;
20+
21+
/* try to strip a UTF-8 BOM;
22+
keep all bytes if it's not a BOM */
23+
auto dest = buffer.Write();
24+
assert(dest.size() >= 3);
25+
dest = dest.first(3);
26+
size_t nbytes = is->LockRead(std::as_writable_bytes(dest));
27+
buffer.Append(nbytes);
28+
29+
auto r = buffer.Read();
30+
if (r.size() >= 3 &&
31+
static_cast<unsigned char>(r[0]) == 0xEF &&
32+
static_cast<unsigned char>(r[1]) == 0xBB &&
33+
static_cast<unsigned char>(r[2]) == 0xBF) {
34+
buffer.Consume(3);
35+
}
36+
}
37+
1838
char *line = ReadBufferedLine(buffer);
1939
if (line != nullptr)
2040
return line;

src/input/TextInputStream.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
class TextInputStream {
1111
InputStreamPtr is;
1212
StaticFifoBuffer<char, 4096> buffer;
13+
bool bom_checked = false;
1314

1415
public:
1516
/**

0 commit comments

Comments
 (0)