Skip to content

Commit 502a8e0

Browse files
fix: handle UTF BOM in config files (#10154)
* fix: handle UTF BOM in config files - Remove UTF-8 BOM (ef bb bf) from config files before parsing - Throw descriptive errors for other BOMs (UTF-16, UTF-32) - Add comprehensive tests for BOM handling in TOML and JSON configs Fixes #9938 Co-Authored-By: [email protected] <[email protected]> * refactor: use array-based approach for BOM detection - Replace individual BOM constants with UNSUPPORTED_BOMS array - Use loop instead of separate if statements for each BOM type - Reduces code duplication while maintaining exact same functionality Addresses PR feedback from vicb Co-Authored-By: [email protected] <[email protected]> * refactor: decode buffer first, then check for BOM codepoints - Use TextDecoder().decode() first, which automatically strips UTF-8 BOMs - Check for UTF-16/UTF-32 BOMs as replacement characters in decoded string - Fall back to raw buffer inspection to determine specific BOM type - Cleaner approach as suggested by user feedback - All existing tests continue to pass Co-Authored-By: [email protected] <[email protected]> * fix: revert to hybrid BOM detection approach - TextDecoder().decode() first to automatically strip UTF-8 BOMs - Check for replacement characters to detect non-UTF-8 BOMs - Fall back to raw buffer inspection for specific BOM type identification - All BOM tests pass (UTF-8 removal, UTF-16/UTF-32 error detection) - Constant string approach not viable as all non-UTF-8 BOMs decode identically Co-Authored-By: [email protected] <[email protected]> * fix: resolve formatting issues in BOM handling code - Apply prettier formatting to parse.ts - No functional changes to BOM detection logic - All BOM tests continue to pass locally Co-Authored-By: [email protected] <[email protected]> * simplify bom validation * refactor: address PR feedback - use Buffer.concat for UTF-8 BOM tests and array-based BOM detection - Update UTF-8 BOM tests to use Buffer.concat([Buffer.from([0xEF, 0xBB, 0xBF]), Buffer.from(configContent, 'utf-8')]) as suggested by petebacondarwin - Revert to array-based BOM detection approach with collapsed properties (name + encoding) as suggested by vicb - Maintain all existing functionality while following preferred patterns Co-Authored-By: [email protected] <[email protected]> * tweak file type * refactor: simplify UNSUPPORTED_BOMS array to use only encoding property Co-Authored-By: [email protected] <[email protected]> * fix: resolve formatting issues in BOM handling implementation Co-Authored-By: [email protected] <[email protected]> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: [email protected] <[email protected]>
1 parent 198c450 commit 502a8e0

File tree

3 files changed

+149
-1
lines changed

3 files changed

+149
-1
lines changed

.changeset/rich-tigers-decide.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"wrangler": patch
3+
---
4+
5+
Fix UTF BOM handling in config files - remove UTF-8 BOM and error on other BOMs

packages/wrangler/src/__tests__/config/configuration.test.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6645,6 +6645,93 @@ describe("experimental_readRawConfig()", () => {
66456645
);
66466646
});
66476647

6648+
describe("BOM (Byte Order Marker) handling", () => {
6649+
runInTempDir();
6650+
6651+
it("should remove UTF-8 BOM from TOML config files", () => {
6652+
const configContent = `name = "test-worker"
6653+
compatibility_date = "2022-01-12"`;
6654+
6655+
fs.writeFileSync(
6656+
"wrangler.toml",
6657+
Buffer.concat([
6658+
Buffer.from([0xef, 0xbb, 0xbf]),
6659+
Buffer.from(configContent, "utf-8"),
6660+
])
6661+
);
6662+
6663+
const config = readConfig({ config: "wrangler.toml" });
6664+
expect(config.name).toBe("test-worker");
6665+
expect(config.compatibility_date).toBe("2022-01-12");
6666+
});
6667+
6668+
it("should remove UTF-8 BOM from JSON config files", () => {
6669+
const configContent = `{
6670+
"name": "test-worker",
6671+
"compatibility_date": "2022-01-12"
6672+
}`;
6673+
6674+
fs.writeFileSync(
6675+
"wrangler.json",
6676+
Buffer.concat([
6677+
Buffer.from([0xef, 0xbb, 0xbf]),
6678+
Buffer.from(configContent, "utf-8"),
6679+
])
6680+
);
6681+
6682+
const config = readConfig({ config: "wrangler.json" });
6683+
expect(config.name).toBe("test-worker");
6684+
expect(config.compatibility_date).toBe("2022-01-12");
6685+
});
6686+
6687+
it("should error on UTF-16 BE BOM", () => {
6688+
const bomBytes = Buffer.from([0xfe, 0xff]);
6689+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6690+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6691+
6692+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6693+
"Configuration file contains UTF-16 BE byte order marker"
6694+
);
6695+
});
6696+
6697+
it("should error on UTF-16 LE BOM", () => {
6698+
const bomBytes = Buffer.from([0xff, 0xfe]);
6699+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6700+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6701+
6702+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6703+
"Configuration file contains UTF-16 LE byte order marker"
6704+
);
6705+
});
6706+
6707+
it("should error on UTF-32 BE BOM", () => {
6708+
const bomBytes = Buffer.from([0x00, 0x00, 0xfe, 0xff]);
6709+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6710+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6711+
6712+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6713+
"Configuration file contains UTF-32 BE byte order marker"
6714+
);
6715+
});
6716+
6717+
it("should error on UTF-32 LE BOM", () => {
6718+
const bomBytes = Buffer.from([0xff, 0xfe, 0x00, 0x00]);
6719+
const configContent = Buffer.from('{"name": "test"}', "utf-8");
6720+
fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
6721+
6722+
expect(() => readConfig({ config: "wrangler.json" })).toThrow(
6723+
"Configuration file contains UTF-32 LE byte order marker"
6724+
);
6725+
});
6726+
6727+
it("should handle files without BOM normally", () => {
6728+
writeWranglerConfig({ name: "no-bom-test" });
6729+
6730+
const config = readConfig({ config: "wrangler.toml" });
6731+
expect(config.name).toBe("no-bom-test");
6732+
});
6733+
});
6734+
66486735
function normalizePath(text: string): string {
66496736
return text
66506737
.replace("project\\wrangler.toml", "project/wrangler.toml")

packages/wrangler/src/parse.ts

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,13 @@ export function readFileSyncToBuffer(file: string): Buffer {
215215
*/
216216
export function readFileSync(file: string): string {
217217
try {
218-
return fs.readFileSync(file, { encoding: "utf-8" });
218+
const buffer = fs.readFileSync(file);
219+
return removeBOMAndValidate(buffer, file);
219220
} catch (err) {
221+
if (err instanceof ParseError) {
222+
throw err;
223+
}
224+
220225
const { message } = err as Error;
221226
throw new ParseError({
222227
text: `Could not read file: ${file}`,
@@ -400,3 +405,54 @@ export function parseByteSize(
400405
Number(size) * Math.pow(base ?? (binary ? 1024 : 1000), pow)
401406
);
402407
}
408+
409+
const UNSUPPORTED_BOMS = [
410+
{
411+
buffer: Buffer.from([0x00, 0x00, 0xfe, 0xff]),
412+
encoding: "UTF-32 BE",
413+
},
414+
{
415+
buffer: Buffer.from([0xff, 0xfe, 0x00, 0x00]),
416+
encoding: "UTF-32 LE",
417+
},
418+
{
419+
buffer: Buffer.from([0xfe, 0xff]),
420+
encoding: "UTF-16 BE",
421+
},
422+
{
423+
buffer: Buffer.from([0xff, 0xfe]),
424+
encoding: "UTF-16 LE",
425+
},
426+
];
427+
428+
/**
429+
* Removes UTF-8 BOM if present and validates that no other BOMs are present.
430+
* Throws ParseError for non-UTF-8 BOMs with descriptive error messages.
431+
*/
432+
function removeBOMAndValidate(buffer: Buffer, file: string): string {
433+
for (const bom of UNSUPPORTED_BOMS) {
434+
if (
435+
buffer.length >= bom.buffer.length &&
436+
buffer.subarray(0, bom.buffer.length).equals(bom.buffer)
437+
) {
438+
throw new ParseError({
439+
text: `Configuration file contains ${bom.encoding} byte order marker`,
440+
notes: [
441+
{
442+
text: `The file "${file}" appears to be encoded as ${bom.encoding}. Please save the file as UTF-8 without BOM.`,
443+
},
444+
],
445+
location: { file, line: 1, column: 0 },
446+
telemetryMessage: `${bom.encoding} BOM detected`,
447+
});
448+
}
449+
}
450+
451+
const content = buffer.toString("utf-8");
452+
453+
if (content.charCodeAt(0) === 0xfeff) {
454+
return content.slice(1);
455+
}
456+
457+
return content;
458+
}

0 commit comments

Comments
 (0)