Skip to content

Commit 1af38cc

Browse files
authored
fix(win): make symbolication and modulefinder independent of the system ANSI code page. (#1389)
* fix(win): make symbolication independent of the system ANSI code page. * allow `NULL` module paths and symbol names. * add an integration test that runs the example from a cyrillic directory and validates the package paths * resolve relative paths + clean up subdir * remove the assertion that a frame must have a function * only assert on the frame_package being a file if it exists... ...however, no longer assert that a frame_package exists. * isolate package assertions to new test * don't conflate checking any function/package with checking package file validity * also adapt the windows modulefinder to be independent system ACP. The szExePath generated for actual UTF-8 paths was already filled with mojibake :-) so LoadLibrary couldn't find any local modules. This is actually connected to the symbolication: * in the server, if a module was found, the backend would assign packages to frames based on the instruction address and the module address range * if the module couldn't be found, as was the case previously, it had to use the frame package provided So, now we fixed both and they should overlap. * update the CHANGELOG * explicitly specify `PSAPI_VERSION` because we only want to link kernel32 * check string_from_wstr return values * use a heap-allocated 32K buffer for module paths * use a heap-allocated 32K buffer for symbol paths * move allocation into wrapping if * format after webui edit
1 parent f2eaa5e commit 1af38cc

File tree

5 files changed

+175
-26
lines changed

5 files changed

+175
-26
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
**Fixes**:
6+
7+
- Windows: Make symbolication and the modulefinder independent of the system ANSI code page. ([#1389](https://github.com/getsentry/sentry-native/pull/1389))
8+
39
## 0.11.1
410

511
**Features**:

src/modulefinder/sentry_modulefinder_windows.c

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,21 @@
66

77
#ifndef SENTRY_PLATFORM_XBOX
88
# include <dbghelp.h>
9-
#else
10-
# include <Psapi.h>
119
#endif
12-
#include <tlhelp32.h>
10+
#define PSAPI_VERSION 2
11+
#include <Psapi.h>
12+
#include <TlHelp32.h>
1313

1414
static bool g_initialized = false;
1515
static sentry_mutex_t g_mutex = SENTRY__MUTEX_INIT;
1616
static sentry_value_t g_modules = { 0 };
1717

1818
#define CV_SIGNATURE 0x53445352
1919

20+
// follow the maximum path length documented here:
21+
// https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
22+
#define MAX_PATH_BUFFER_SIZE 32768
23+
2024
struct CodeViewRecord70 {
2125
uint32_t signature;
2226
GUID pdb_signature;
@@ -92,6 +96,35 @@ extract_pdb_info(uintptr_t module_addr, sentry_value_t module)
9296
}
9397
}
9498

99+
static void
100+
log_library_load_error(const wchar_t *module_filename_w)
101+
{
102+
const DWORD ec = GetLastError();
103+
LPWSTR msg_w = NULL;
104+
FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM
105+
| FORMAT_MESSAGE_IGNORE_INSERTS,
106+
NULL, ec, 0, (LPWSTR)&msg_w, 0, NULL);
107+
char *msg = sentry__string_from_wstr(msg_w);
108+
char *module_filename = sentry__string_from_wstr(module_filename_w);
109+
if (!msg || !module_filename) {
110+
if (module_filename) {
111+
sentry_free(module_filename);
112+
}
113+
if (msg) {
114+
sentry_free(msg);
115+
}
116+
if (msg_w) {
117+
LocalFree(msg_w);
118+
}
119+
return;
120+
}
121+
SENTRY_ERRORF("LoadLibraryExW failed (%lu): %s (\"%s\")\n", ec,
122+
msg ? msg : "(no message)", module_filename);
123+
sentry_free(module_filename);
124+
sentry_free(msg);
125+
LocalFree(msg_w);
126+
}
127+
95128
static void
96129
load_modules(void)
97130
{
@@ -101,14 +134,32 @@ load_modules(void)
101134
MODULEENTRY32W module = { 0 };
102135
module.dwSize = sizeof(MODULEENTRY32W);
103136
g_modules = sentry_value_new_list();
137+
wchar_t *module_filename_w = NULL;
104138

105-
if (Module32FirstW(snapshot, &module)) {
139+
if (Module32FirstW(snapshot, &module)
140+
&& (module_filename_w
141+
= sentry_malloc(sizeof(wchar_t) * MAX_PATH_BUFFER_SIZE))) {
106142
do {
107-
HMODULE handle = LoadLibraryExW(
108-
module.szExePath, NULL, LOAD_LIBRARY_AS_DATAFILE);
143+
HMODULE module_handle = NULL;
144+
if (GetModuleFileNameExW(GetCurrentProcess(), module.hModule,
145+
module_filename_w, MAX_PATH_BUFFER_SIZE)) {
146+
module_handle = LoadLibraryExW(
147+
module_filename_w, NULL, LOAD_LIBRARY_AS_DATAFILE);
148+
} else {
149+
char *module_name = sentry__string_from_wstr(module.szModule);
150+
if (module_name) {
151+
SENTRY_ERRORF(
152+
"Failed to get module filename for %s", module_name);
153+
sentry_free(module_name);
154+
}
155+
continue;
156+
}
157+
if (!module_handle) {
158+
log_library_load_error(module_filename_w);
159+
continue;
160+
}
109161
MEMORY_BASIC_INFORMATION vmem_info = { 0 };
110-
if (handle
111-
&& sizeof(vmem_info)
162+
if (sizeof(vmem_info)
112163
== VirtualQuery(
113164
module.modBaseAddr, &vmem_info, sizeof(vmem_info))
114165
&& vmem_info.State == MEM_COMMIT) {
@@ -120,12 +171,14 @@ load_modules(void)
120171
sentry_value_set_by_key(rv, "image_size",
121172
sentry_value_new_int32((int32_t)module.modBaseSize));
122173
sentry_value_set_by_key(rv, "code_file",
123-
sentry__value_new_string_from_wstr(module.szExePath));
174+
sentry__value_new_string_from_wstr(module_filename_w));
124175
extract_pdb_info((uintptr_t)module.modBaseAddr, rv);
125176
sentry_value_append(g_modules, rv);
126177
}
127-
FreeLibrary(handle);
178+
FreeLibrary(module_handle);
128179
} while (Module32NextW(snapshot, &module));
180+
181+
sentry_free(module_filename_w);
129182
}
130183

131184
CloseHandle(snapshot);

src/symbolizer/sentry_symbolizer_windows.c

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
#include "sentry_boot.h"
2+
#include "sentry_string.h"
23

34
#include "sentry_symbolizer.h"
45
#include "sentry_windows_dbghelp.h"
56

67
#include <dbghelp.h>
78
#include <malloc.h>
89

9-
#define MAX_SYM 1024
10+
// follow the maximum path length documented here:
11+
// https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
12+
#define MAX_PATH_BUFFER_SIZE 32768
1013

1114
bool
1215
sentry__symbolize(
@@ -17,29 +20,46 @@ sentry__symbolize(
1720
(void)func;
1821
(void)addr;
1922
#else
23+
if (!addr || !func) {
24+
return false;
25+
}
2026
HANDLE proc = sentry__init_dbghelp();
27+
size_t symbol_info_size
28+
= sizeof(SYMBOL_INFOW) + MAX_SYM_NAME * sizeof(WCHAR);
29+
SYMBOL_INFOW *symbol_info = _alloca(symbol_info_size);
30+
memset(symbol_info, 0, symbol_info_size);
31+
symbol_info->MaxNameLen = MAX_SYM_NAME;
32+
symbol_info->SizeOfStruct = sizeof(SYMBOL_INFOW);
2133

22-
SYMBOL_INFO *sym = (SYMBOL_INFO *)_alloca(sizeof(SYMBOL_INFO) + MAX_SYM);
23-
memset(sym, 0, sizeof(SYMBOL_INFO) + MAX_SYM);
24-
sym->MaxNameLen = MAX_SYM;
25-
sym->SizeOfStruct = sizeof(SYMBOL_INFO);
34+
if (!SymFromAddrW(proc, (uintptr_t)addr, NULL, symbol_info)) {
35+
return false;
36+
}
2637

27-
if (!SymFromAddr(proc, (DWORD64)addr, 0, sym)) {
38+
wchar_t *mod_path_w = sentry_malloc(sizeof(wchar_t) * MAX_PATH_BUFFER_SIZE);
39+
if (!mod_path_w) {
40+
return false;
41+
}
42+
const DWORD n = GetModuleFileNameW((HMODULE)(uintptr_t)symbol_info->ModBase,
43+
mod_path_w, MAX_PATH_BUFFER_SIZE);
44+
if (n == 0 || n >= MAX_PATH_BUFFER_SIZE) {
45+
sentry_free(mod_path_w);
2846
return false;
2947
}
3048

31-
char mod_name[MAX_PATH];
32-
GetModuleFileNameA(
33-
(HMODULE)(size_t)sym->ModBase, mod_name, sizeof(mod_name));
49+
char *mod_path = sentry__string_from_wstr(mod_path_w);
50+
char *symbol_name = sentry__string_from_wstr(symbol_info->Name);
3451

35-
sentry_frame_info_t frame_info;
36-
memset(&frame_info, 0, sizeof(sentry_frame_info_t));
37-
frame_info.load_addr = (void *)(size_t)sym->ModBase;
52+
sentry_frame_info_t frame_info = { 0 };
53+
frame_info.load_addr = (void *)(uintptr_t)symbol_info->ModBase;
3854
frame_info.instruction_addr = addr;
39-
frame_info.symbol_addr = (void *)(size_t)sym->Address;
40-
frame_info.symbol = sym->Name;
41-
frame_info.object_name = mod_name;
55+
frame_info.symbol_addr = (void *)(uintptr_t)symbol_info->Address;
56+
frame_info.symbol = symbol_name;
57+
frame_info.object_name = mod_path;
4258
func(&frame_info, data);
59+
60+
sentry_free(mod_path);
61+
sentry_free(symbol_name);
62+
sentry_free(mod_path_w);
4363
#endif // SENTRY_PLATFORM_XBOX
4464

4565
return true;

tests/assertions.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
from dataclasses import dataclass
88
from datetime import datetime, UTC
9+
from pathlib import Path
910

1011
import msgpack
1112

@@ -167,7 +168,9 @@ def assert_event_meta(
167168
)
168169

169170

170-
def assert_stacktrace(envelope, inside_exception=False, check_size=True):
171+
def assert_stacktrace(
172+
envelope, inside_exception=False, check_size=True, check_package=False
173+
):
171174
event = envelope.get_event()
172175

173176
parent = event["exception"] if inside_exception else event["threads"]
@@ -182,6 +185,17 @@ def assert_stacktrace(envelope, inside_exception=False, check_size=True):
182185
for frame in frames
183186
)
184187

188+
if check_package:
189+
for frame in frames:
190+
frame_package = frame.get("package")
191+
if frame_package is not None:
192+
frame_package_path = Path(frame_package)
193+
# only assert on absolute paths, since letting pathlib resolve relative paths is cheating
194+
if frame_package_path.is_absolute():
195+
assert (
196+
frame_package_path.is_file()
197+
), f"package is not a valid file path: '{frame_package}'"
198+
185199

186200
def assert_breadcrumb_inner(breadcrumbs, message="debug crumb"):
187201
expected = {

tests/test_integration_stdout.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import os
2+
import shutil
23
import subprocess
34
import sys
45
import time
6+
from pathlib import Path
57

68
import pytest
79

@@ -18,6 +20,7 @@
1820
assert_no_before_send,
1921
assert_crash_timestamp,
2022
assert_breakpad_crash,
23+
assert_exception,
2124
)
2225
from .conditions import has_breakpad, has_files
2326

@@ -46,6 +49,59 @@ def test_capture_stdout(cmake):
4649
assert_event(envelope)
4750

4851

52+
def copy_except(src: Path, dst: Path, exceptions: list[str] = None) -> None:
53+
"""
54+
Recursively copy everything from src to dst, except for entries whose
55+
names are in `exceptions`.
56+
"""
57+
exceptions = set(exceptions or [])
58+
59+
dst.mkdir(parents=True, exist_ok=True)
60+
61+
for entry in src.iterdir():
62+
if entry.name in exceptions:
63+
continue
64+
65+
dest = dst / entry.name
66+
if entry.is_dir():
67+
shutil.copytree(entry, dest, symlinks=True)
68+
else:
69+
shutil.copy2(entry, dest)
70+
71+
72+
@pytest.mark.skipif(not has_files, reason="test needs a local filesystem")
73+
def test_capture_exception_from_utf8_path_stdout(cmake):
74+
"""
75+
This test verifies that we can handle symbolication from an utf-8 path.
76+
"""
77+
tmp_path = cmake(
78+
["sentry_example"],
79+
{
80+
"SENTRY_BACKEND": "none",
81+
"SENTRY_TRANSPORT": "none",
82+
},
83+
)
84+
# create a cyrillic subdirectory in tmp_path and copy tmp_path into it
85+
cwd = tmp_path / "кириллица-тест"
86+
cwd.mkdir()
87+
copy_except(tmp_path, cwd, exceptions=["кириллица-тест"])
88+
89+
output = check_output(
90+
cwd,
91+
"sentry_example",
92+
["stdout", "capture-exception", "add-stacktrace"],
93+
)
94+
envelope = Envelope.deserialize(output)
95+
96+
assert_meta(envelope)
97+
assert_breadcrumb(envelope)
98+
assert_stacktrace(envelope, inside_exception=True, check_package=True)
99+
assert_exception(envelope)
100+
101+
# delete the cyrillic directory, but only after we asserted on stack frame packages being files
102+
shutil.rmtree(cwd)
103+
104+
49105
def test_dynamic_sdk_name_override(cmake):
50106
tmp_path = cmake(
51107
["sentry_example"],

0 commit comments

Comments
 (0)