fix: handle UTF BOM in config files (#10154)

devin-ai-integration[bot] · petebacondarwin · web-flow · commit 502a8e0db0ee · 2025-08-01T08:15:38.000+01:00
* fix: handle UTF BOM in config files - Remove UTF-8 BOM (ef bb bf) from config files before parsing - Throw descriptive errors for other BOMs (UTF-16, UTF-32) - Add comprehensive tests for BOM handling in TOML and JSON configs Fixes #9938 Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * refactor: use array-based approach for BOM detection - Replace individual BOM constants with UNSUPPORTED_BOMS array - Use loop instead of separate if statements for each BOM type - Reduces code duplication while maintaining exact same functionality Addresses PR feedback from vicb Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * refactor: decode buffer first, then check for BOM codepoints - Use TextDecoder().decode() first, which automatically strips UTF-8 BOMs - Check for UTF-16/UTF-32 BOMs as replacement characters in decoded string - Fall back to raw buffer inspection to determine specific BOM type - Cleaner approach as suggested by user feedback - All existing tests continue to pass Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * fix: revert to hybrid BOM detection approach - TextDecoder().decode() first to automatically strip UTF-8 BOMs - Check for replacement characters to detect non-UTF-8 BOMs - Fall back to raw buffer inspection for specific BOM type identification - All BOM tests pass (UTF-8 removal, UTF-16/UTF-32 error detection) - Constant string approach not viable as all non-UTF-8 BOMs decode identically Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * fix: resolve formatting issues in BOM handling code - Apply prettier formatting to parse.ts - No functional changes to BOM detection logic - All BOM tests continue to pass locally Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * simplify bom validation * refactor: address PR feedback - use Buffer.concat for UTF-8 BOM tests and array-based BOM detection - Update UTF-8 BOM tests to use Buffer.concat([Buffer.from([0xEF, 0xBB, 0xBF]), Buffer.from(configContent, 'utf-8')]) as suggested by petebacondarwin - Revert to array-based BOM detection approach with collapsed properties (name + encoding) as suggested by vicb - Maintain all existing functionality while following preferred patterns Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * tweak file type * refactor: simplify UNSUPPORTED_BOMS array to use only encoding property Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> * fix: resolve formatting issues in BOM handling implementation Co-Authored-By: pbacondarwin@cloudflare.com <pete@bacondarwin.com> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: pbacondarwin@cloudflare.com <pete@bacondarwin.com>
diff --git a/.changeset/rich-tigers-decide.md b/.changeset/rich-tigers-decide.md
@@ -0,0 +1,5 @@
+---
+"wrangler": patch
+---
+
+Fix UTF BOM handling in config files - remove UTF-8 BOM and error on other BOMs
diff --git a/packages/wrangler/src/__tests__/config/configuration.test.ts b/packages/wrangler/src/__tests__/config/configuration.test.ts
@@ -6645,6 +6645,93 @@ describe("experimental_readRawConfig()", () => {
 	);
 });
 
+describe("BOM (Byte Order Marker) handling", () => {
+	runInTempDir();
+
+	it("should remove UTF-8 BOM from TOML config files", () => {
+		const configContent = `name = "test-worker"
+compatibility_date = "2022-01-12"`;
+
+		fs.writeFileSync(
+			"wrangler.toml",
+			Buffer.concat([
+				Buffer.from([0xef, 0xbb, 0xbf]),
+				Buffer.from(configContent, "utf-8"),
+			])
+		);
+
+		const config = readConfig({ config: "wrangler.toml" });
+		expect(config.name).toBe("test-worker");
+		expect(config.compatibility_date).toBe("2022-01-12");
+	});
+
+	it("should remove UTF-8 BOM from JSON config files", () => {
+		const configContent = `{
+	"name": "test-worker",
+	"compatibility_date": "2022-01-12"
+}`;
+
+		fs.writeFileSync(
+			"wrangler.json",
+			Buffer.concat([
+				Buffer.from([0xef, 0xbb, 0xbf]),
+				Buffer.from(configContent, "utf-8"),
+			])
+		);
+
+		const config = readConfig({ config: "wrangler.json" });
+		expect(config.name).toBe("test-worker");
+		expect(config.compatibility_date).toBe("2022-01-12");
+	});
+
+	it("should error on UTF-16 BE BOM", () => {
+		const bomBytes = Buffer.from([0xfe, 0xff]);
+		const configContent = Buffer.from('{"name": "test"}', "utf-8");
+		fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
+
+		expect(() => readConfig({ config: "wrangler.json" })).toThrow(
+			"Configuration file contains UTF-16 BE byte order marker"
+		);
+	});
+
+	it("should error on UTF-16 LE BOM", () => {
+		const bomBytes = Buffer.from([0xff, 0xfe]);
+		const configContent = Buffer.from('{"name": "test"}', "utf-8");
+		fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
+
+		expect(() => readConfig({ config: "wrangler.json" })).toThrow(
+			"Configuration file contains UTF-16 LE byte order marker"
+		);
+	});
+
+	it("should error on UTF-32 BE BOM", () => {
+		const bomBytes = Buffer.from([0x00, 0x00, 0xfe, 0xff]);
+		const configContent = Buffer.from('{"name": "test"}', "utf-8");
+		fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
+
+		expect(() => readConfig({ config: "wrangler.json" })).toThrow(
+			"Configuration file contains UTF-32 BE byte order marker"
+		);
+	});
+
+	it("should error on UTF-32 LE BOM", () => {
+		const bomBytes = Buffer.from([0xff, 0xfe, 0x00, 0x00]);
+		const configContent = Buffer.from('{"name": "test"}', "utf-8");
+		fs.writeFileSync("wrangler.json", Buffer.concat([bomBytes, configContent]));
+
+		expect(() => readConfig({ config: "wrangler.json" })).toThrow(
+			"Configuration file contains UTF-32 LE byte order marker"
+		);
+	});
+
+	it("should handle files without BOM normally", () => {
+		writeWranglerConfig({ name: "no-bom-test" });
+
+		const config = readConfig({ config: "wrangler.toml" });
+		expect(config.name).toBe("no-bom-test");
+	});
+});
+
 function normalizePath(text: string): string {
 	return text
 		.replace("project\\wrangler.toml", "project/wrangler.toml")
diff --git a/packages/wrangler/src/parse.ts b/packages/wrangler/src/parse.ts
@@ -215,8 +215,13 @@ export function readFileSyncToBuffer(file: string): Buffer {
  */
 export function readFileSync(file: string): string {
 	try {
-		return fs.readFileSync(file, { encoding: "utf-8" });
+		const buffer = fs.readFileSync(file);
+		return removeBOMAndValidate(buffer, file);
 	} catch (err) {
+		if (err instanceof ParseError) {
+			throw err;
+		}
+
 		const { message } = err as Error;
 		throw new ParseError({
 			text: `Could not read file: ${file}`,
@@ -400,3 +405,54 @@ export function parseByteSize(
 		Number(size) * Math.pow(base ?? (binary ? 1024 : 1000), pow)
 	);
 }
+
+const UNSUPPORTED_BOMS = [
+	{
+		buffer: Buffer.from([0x00, 0x00, 0xfe, 0xff]),
+		encoding: "UTF-32 BE",
+	},
+	{
+		buffer: Buffer.from([0xff, 0xfe, 0x00, 0x00]),
+		encoding: "UTF-32 LE",
+	},
+	{
+		buffer: Buffer.from([0xfe, 0xff]),
+		encoding: "UTF-16 BE",
+	},
+	{
+		buffer: Buffer.from([0xff, 0xfe]),
+		encoding: "UTF-16 LE",
+	},
+];
+
+/**
+ * Removes UTF-8 BOM if present and validates that no other BOMs are present.
+ * Throws ParseError for non-UTF-8 BOMs with descriptive error messages.
+ */
+function removeBOMAndValidate(buffer: Buffer, file: string): string {
+	for (const bom of UNSUPPORTED_BOMS) {
+		if (
+			buffer.length >= bom.buffer.length &&
+			buffer.subarray(0, bom.buffer.length).equals(bom.buffer)
+		) {
+			throw new ParseError({
+				text: `Configuration file contains ${bom.encoding} byte order marker`,
+				notes: [
+					{
+						text: `The file "${file}" appears to be encoded as ${bom.encoding}. Please save the file as UTF-8 without BOM.`,
+					},
+				],
+				location: { file, line: 1, column: 0 },
+				telemetryMessage: `${bom.encoding} BOM detected`,
+			});
+		}
+	}
+
+	const content = buffer.toString("utf-8");
+
+	if (content.charCodeAt(0) === 0xfeff) {
+		return content.slice(1);
+	}
+
+	return content;
+}

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"wrangler": patch
 +---
++
 +Fix UTF BOM handling in config files - remove UTF-8 BOM and error on other BOMs