Skip to content
Merged
38 changes: 36 additions & 2 deletions Lib/test/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,10 @@ def __enter__(self):
self._preserved = self.path.read_bytes()
except FileNotFoundError:
self._preserved = None
self.path.write_text(self.content, encoding="utf-16")
if isinstance(self.content, bytes):
self.path.write_bytes(self.content)
else:
self.path.write_text(self.content, encoding="utf-16")

def __exit__(self, *exc_info):
if self._preserved is None:
Expand Down Expand Up @@ -271,7 +274,10 @@ def py_ini(self, content):
@contextlib.contextmanager
def script(self, content, encoding="utf-8"):
file = Path(tempfile.mktemp(dir=os.getcwd()) + ".py")
file.write_text(content, encoding=encoding)
if isinstance(content, bytes):
file.write_bytes(content)
else:
file.write_text(content, encoding=encoding)
try:
yield file
finally:
Expand Down Expand Up @@ -469,6 +475,15 @@ def test_py_default(self):
self.assertEqual("3.100", data["SearchInfo.tag"])
self.assertEqual("X.Y.exe -arg", data["stdout"].strip())

@unittest.expectedFailure # fails until GH-99620 is fixed
def test_py_default_with_valid_bom(self):
content = TEST_PY_DEFAULTS.encode("utf-8")
with self.py_ini(b"\xEF\xBB\xBF" + content):
data = self.run_py(["-arg"])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't read from the INI file ourselves, and unless the SC rejects PEP 773 we're not going to replace the existing Windows API call with our own code.

I'd just remove this test.

self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
self.assertEqual("3.100", data["SearchInfo.tag"])
self.assertEqual("X.Y.exe -arg", data["stdout"].strip())

def test_py2_default(self):
with self.py_ini(TEST_PY_DEFAULTS):
data = self.run_py(["-2", "-arg"])
Expand Down Expand Up @@ -624,6 +639,25 @@ def test_py_shebang_short_argv0(self):
self.assertEqual("3.100", data["SearchInfo.tag"])
self.assertEqual(f'X.Y.exe -prearg "{script}" -postarg', data["stdout"].strip())

def test_py_shebang_valid_bom(self):
with self.py_ini(TEST_PY_DEFAULTS):
content = "#! /usr/bin/python -prearg".encode("utf-8")
with self.script(b"\xEF\xBB\xBF" + content) as script:
data = self.run_py([script, "-postarg"])
self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
self.assertEqual("3.100", data["SearchInfo.tag"])
self.assertEqual(f"X.Y.exe -prearg {quote(script)} -postarg", data["stdout"].strip())

def test_py_shebang_invalid_bom(self):
with self.py_ini(TEST_PY_DEFAULTS):
content = "#! /usr/bin/python3 -prearg".encode("utf-8")
with self.script(b"\xEF\xAA\xBF" + content) as script:
data = self.run_py([script, "-postarg"])
self.assertIn("Invalid BOM", data["stderr"])
self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
self.assertEqual("3.100", data["SearchInfo.tag"])
self.assertEqual(f"X.Y.exe {quote(script)} -postarg", data["stdout"].strip())

def test_py_handle_64_in_ini(self):
with self.py_ini("\n".join(["[defaults]", "python=3.999-64"])):
# Expect this to fail, but should get oldStyleTag flipped on
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:source:`pylauncher <PC/launcher2.c>` correctly detects a BOM when searching for the
shebang. Fix by Chris Eibl.
8 changes: 4 additions & 4 deletions PC/launcher2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,7 @@ checkShebang(SearchInfo *search)
}

DWORD bytesRead = 0;
char buffer[4096];
unsigned char buffer[4096];
if (!ReadFile(hFile, buffer, sizeof(buffer), &bytesRead, NULL)) {
debug(L"# Failed to read %s for shebang parsing (0x%08X)\n",
scriptFile, GetLastError());
Expand All @@ -1075,7 +1075,7 @@ checkShebang(SearchInfo *search)
free(scriptFile);


char *b = buffer;
unsigned char *b = buffer;
bool onlyUtf8 = false;
if (bytesRead > 3 && *b == 0xEF) {
if (*++b == 0xBB && *++b == 0xBF) {
Expand All @@ -1096,13 +1096,13 @@ checkShebang(SearchInfo *search)
++b;
--bytesRead;
while (--bytesRead > 0 && isspace(*++b)) { }
char *start = b;
const unsigned char *start = b;
while (--bytesRead > 0 && *++b != '\r' && *b != '\n') { }
wchar_t *shebang;
int shebangLength;
// We add 1 when bytesRead==0, as in that case we hit EOF and b points
// to the last character in the file, not the newline
int exitCode = _decodeShebang(search, start, (int)(b - start + (bytesRead == 0)), onlyUtf8, &shebang, &shebangLength);
int exitCode = _decodeShebang(search, (const char*)start, (int)(b - start + (bytesRead == 0)), onlyUtf8, &shebang, &shebangLength);
if (exitCode) {
return exitCode;
}
Expand Down
Loading