Skip to content

Commit 0d73563

Browse files
fix: handle UTF BOM in config files (v3 backport) (#10171)
* feat: add BOM handling implementation and tests for v3-maintenance backport - Add removeBOMAndValidate function to detect and handle BOMs in config files - Remove UTF-8 BOM automatically during file parsing - Error on unsupported BOMs (UTF-16 BE/LE, UTF-32 BE/LE) with descriptive messages - Add comprehensive test suite covering all BOM scenarios - Backport from main branch BOM handling implementation Co-Authored-By: [email protected] <[email protected]> * add changeset for v3-maintenance backport Co-Authored-By: [email protected] <[email protected]> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: [email protected] <[email protected]>
1 parent b5d9bb0 commit 0d73563

File tree

3 files changed

+145
-1
lines changed

3 files changed

+145
-1
lines changed

.changeset/violet-walls-tie.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"wrangler": patch
3+
---
4+
5+
Handle UTF BOM in config files - detect and remove UTF-8 BOMs, error on unsupported BOMs (UTF-16, UTF-32)

packages/wrangler/src/__tests__/config/configuration.test.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6616,6 +6616,93 @@ describe("experimental_readRawConfig()", () => {
66166616
);
66176617
});
66186618

6619+
describe("BOM (Byte Order Marker) handling", () => {
6620+
runInTempDir();
6621+
6622+
it("should remove UTF-8 BOM from TOML config files", () => {
6623+
const configContent = `name = "test-worker"
6624+
compatibility_date = "2022-01-12"`;
6625+
6626+
fs.writeFileSync(
6627+
"wrangler.toml",
6628+
Buffer.concat([
6629+
Buffer.from([0xef, 0xbb, 0xbf]),
6630+
Buffer.from(configContent, "utf-8"),
6631+
])
6632+
);
6633+
6634+
const config = readConfig({ config: "wrangler.toml" });
6635+
expect(config.name).toBe("test-worker");
6636+
expect(config.compatibility_date).toBe("2022-01-12");
6637+
});
6638+
6639+
it("should remove UTF-8 BOM from JSON config files", () => {
6640+
const configContent = `{
6641+
"name": "test-worker",
6642+
"compatibility_date": "2022-01-12"
6643+
}`;
6644+
6645+
fs.writeFileSync(
6646+
"wrangler.json",
6647+
Buffer.concat([
6648+
Buffer.from([0xef, 0xbb, 0xbf]),
6649+
Buffer.from(configContent, "utf-8"),
6650+
])
6651+
);
6652+
6653+
const config = readConfig({ config: "wrangler.json" });
6654+
expect(config.name).toBe("test-worker");
6655+
expect(config.compatibility_date).toBe("2022-01-12");
6656+
});
6657+
6658+
it("should error on UTF-16 BE BOM", () => {
6659+
const bomBytes = Buffer.from([0xfe, 0xff]);
6660+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6661+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6662+
6663+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6664+
"Configuration file contains UTF-16 BE byte order marker"
6665+
);
6666+
});
6667+
6668+
it("should error on UTF-16 LE BOM", () => {
6669+
const bomBytes = Buffer.from([0xff, 0xfe]);
6670+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6671+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6672+
6673+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6674+
"Configuration file contains UTF-16 LE byte order marker"
6675+
);
6676+
});
6677+
6678+
it("should error on UTF-32 BE BOM", () => {
6679+
const bomBytes = Buffer.from([0x00, 0x00, 0xfe, 0xff]);
6680+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6681+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6682+
6683+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6684+
"Configuration file contains UTF-32 BE byte order marker"
6685+
);
6686+
});
6687+
6688+
it("should error on UTF-32 LE BOM", () => {
6689+
const bomBytes = Buffer.from([0xff, 0xfe, 0x00, 0x00]);
6690+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6691+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6692+
6693+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6694+
"Configuration file contains UTF-32 LE byte order marker"
6695+
);
6696+
});
6697+
6698+
it("should handle files without BOM normally", () => {
6699+
writeWranglerConfig({ name: "no-bom-test" });
6700+
6701+
const config = readConfig({ config: "wrangler.toml" });
6702+
expect(config.name).toBe("no-bom-test");
6703+
});
6704+
});
6705+
66196706
function normalizePath(text: string): string {
66206707
return text
66216708
.replace("project\\wrangler.toml", "project/wrangler.toml")

packages/wrangler/src/parse.ts

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,13 @@ export function readFileSyncToBuffer(file: string): Buffer {
215215
*/
216216
export function readFileSync(file: string): string {
217217
try {
218-
return fs.readFileSync(file, { encoding: "utf-8" });
218+
const buffer = fs.readFileSync(file);
219+
return removeBOMAndValidate(buffer, file);
219220
} catch (err) {
221+
if (err instanceof ParseError) {
222+
throw err;
223+
}
224+
220225
const { message } = err as Error;
221226
throw new ParseError({
222227
text: `Could not read file: ${file}`,
@@ -359,3 +364,50 @@ export function parseNonHyphenedUuid(uuid: string | null): string | null {
359364

360365
return hyphenated.slice(0, 36);
361366
}
367+
368+
const UNSUPPORTED_BOMS = [
369+
{
370+
buffer: Buffer.from([0x00, 0x00, 0xfe, 0xff]),
371+
encoding: "UTF-32 BE",
372+
},
373+
{
374+
buffer: Buffer.from([0xff, 0xfe, 0x00, 0x00]),
375+
encoding: "UTF-32 LE",
376+
},
377+
{
378+
buffer: Buffer.from([0xfe, 0xff]),
379+
encoding: "UTF-16 BE",
380+
},
381+
{
382+
buffer: Buffer.from([0xff, 0xfe]),
383+
encoding: "UTF-16 LE",
384+
},
385+
];
386+
387+
function removeBOMAndValidate(buffer: Buffer, file: string): string {
388+
for (const bom of UNSUPPORTED_BOMS) {
389+
if (
390+
buffer.length >= bom.buffer.length &&
391+
buffer.subarray(0, bom.buffer.length).equals(bom.buffer)
392+
) {
393+
throw new ParseError({
394+
text: `Configuration file contains ${bom.encoding} byte order marker`,
395+
notes: [
396+
{
397+
text: `The file "${file}" appears to be encoded as ${bom.encoding}. Please save the file as UTF-8 without BOM.`,
398+
},
399+
],
400+
location: { file, line: 1, column: 0 },
401+
telemetryMessage: `${bom.encoding} BOM detected`,
402+
});
403+
}
404+
}
405+
406+
const content = buffer.toString("utf-8");
407+
408+
if (content.charCodeAt(0) === 0xfeff) {
409+
return content.slice(1);
410+
}
411+
412+
return content;
413+
}

0 commit comments

Comments
 (0)