Skip to content

Commit 4cbe0cd

Browse files
committed
fix: musl C++ build and C# duplicate variable name
- Fix musl tesseract build: replace -isystem approach with -nostdinc and explicit include path ordering so #include_next from libstdc++ finds musl C headers instead of glibc's - Fix C# Serialization.cs: rename duplicate 'document' variable to 'documentProp' to resolve scope conflict with JsonDocument
1 parent 36bceed commit 4cbe0cd

File tree

2 files changed

+49
-23
lines changed

2 files changed

+49
-23
lines changed

crates/kreuzberg-tesseract/build.rs

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,24 @@ mod build_tesseract {
128128
/// glibc C headers, producing objects with glibc-versioned symbols (e.g.
129129
/// `__isoc23_sscanf@@GLIBC_2.38`) incompatible with musl linking.
130130
///
131-
/// This wrapper prepends musl's C header directory via `-isystem` so that musl's
132-
/// headers shadow glibc's. Unlike libc++ (which uses wrapper `<stddef.h>` etc.
133-
/// with `#include_next`), libstdc++ includes C headers directly from `<cstdlib>`
134-
/// etc., so `-isystem` shadowing works correctly without `-nostdinc`.
131+
/// The wrapper uses `-nostdinc` to remove ALL default system include paths, then
132+
/// adds them back in the correct order:
133+
/// 1. GCC internal headers (stddef.h, stdarg.h, etc.)
134+
/// 2. GCC fixed-includes (system header fixups)
135+
/// 3. C++ standard library headers (libstdc++)
136+
/// 4. C++ platform-specific headers (bits/c++config.h etc.)
137+
/// 5. musl C headers (replaces glibc C headers)
135138
///
136-
/// Additionally, some glibc-specific C++ platform headers (e.g. `os_defines.h`,
137-
/// `libc-header-start.h`, `floatn.h`) still get picked up from gcc's built-in
138-
/// include paths. These headers use `__GLIBC_PREREQ()` and `__GLIBC_USE()` macros
139-
/// that musl doesn't define. We define these as no-op macros evaluating to 0 so
140-
/// glibc-guarded code paths are correctly skipped.
139+
/// This ordering is critical because libstdc++ headers use `#include_next`
140+
/// to include C headers. With `-isystem` alone (no `-nostdinc`), the musl
141+
/// headers come before libstdc++ in the search path, so `#include_next` from
142+
/// `<cstdlib>` skips them and finds glibc's headers instead. By using
143+
/// `-nostdinc` and placing musl headers AFTER the C++ headers, `#include_next`
144+
/// correctly finds musl's C headers.
145+
///
146+
/// Additionally defines glibc compatibility macros as no-ops so that
147+
/// platform-specific C++ headers (os_defines.h, floatn.h) that reference
148+
/// `__GLIBC_PREREQ()` etc. compile correctly with musl.
141149
#[cfg(unix)]
142150
fn create_musl_cxx_wrapper(target: &str) -> Option<String> {
143151
use std::os::unix::fs::PermissionsExt;
@@ -157,27 +165,45 @@ mod build_tesseract {
157165
return None;
158166
}
159167

168+
// Detect host GCC triplet and version for include path construction
169+
let host_arch = host.split('-').next().unwrap_or("x86_64");
170+
let host_triplet = format!("{host_arch}-linux-gnu");
171+
160172
// Write wrapper script to OUT_DIR
161173
let out_dir = env::var("OUT_DIR").unwrap();
162174
let wrapper_path = format!("{out_dir}/musl-g++.sh");
163175
let wrapper_content = format!(
164-
"#!/bin/sh\n\
165-
# Auto-generated musl-g++ wrapper for cross-compilation.\n\
166-
# Prepends musl C headers so they shadow glibc's.\n\
167-
# Defines glibc compat macros as 0 for musl -- handles os_defines.h,\n\
168-
# libc-header-start.h, floatn.h etc. that use __GLIBC_PREREQ().\n\
169-
# Also defines __GNUC_PREREQ for floatn.h which checks compiler version.\n\
170-
exec g++ -isystem \"{musl_include}\" \\\n\
171-
'-D__GLIBC_PREREQ(maj,min)=0' \\\n\
172-
'-D__GLIBC_USE(F)=0' \\\n\
173-
'-D__GNUC_PREREQ(maj,min)=0' \\\n\
174-
\"$@\"\n"
176+
r#"#!/bin/sh
177+
# Auto-generated musl-g++ wrapper for cross-compilation.
178+
# Uses -nostdinc to remove default system includes, then adds back
179+
# GCC internal, C++ stdlib, and musl C headers in the correct order
180+
# so that #include_next from libstdc++ finds musl headers (not glibc).
181+
182+
GCC_VER=$(g++ -dumpversion | cut -d. -f1)
183+
TRIPLET="{host_triplet}"
184+
GCC_LIB="/usr/lib/gcc/$TRIPLET/$GCC_VER"
185+
186+
exec g++ -nostdinc \
187+
-isystem "$GCC_LIB/include" \
188+
-isystem "$GCC_LIB/include-fixed" \
189+
-isystem "/usr/include/c++/$GCC_VER" \
190+
-isystem "/usr/include/$TRIPLET/c++/$GCC_VER" \
191+
-isystem "{musl_include}" \
192+
'-D__GLIBC_PREREQ(maj,min)=0' \
193+
'-D__GLIBC_USE(F)=0' \
194+
'-D__GNUC_PREREQ(maj,min)=((__GNUC__<<16)+__GNUC_MINOR__>=((maj)<<16)+(min))' \
195+
'-D__locale_t=locale_t' \
196+
'-include' '{musl_include}/locale.h' \
197+
"$@"
198+
"#
175199
);
176200

177201
fs::write(&wrapper_path, &wrapper_content).ok()?;
178202
fs::set_permissions(&wrapper_path, fs::Permissions::from_mode(0o755)).ok()?;
179203

180-
println!("cargo:warning=Created musl g++ wrapper at {wrapper_path} (musl headers: {musl_include})");
204+
println!(
205+
"cargo:warning=Created musl g++ wrapper at {wrapper_path} (musl headers: {musl_include}, host triplet: {host_triplet})"
206+
);
181207
Some(wrapper_path)
182208
}
183209

packages/csharp/Kreuzberg/Serialization.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,9 +1582,9 @@ internal static ExtractionResult ParseResult(string json)
15821582
result.DjotContent = DeserializeElement<DjotContent>(djotContent);
15831583
}
15841584

1585-
if (root.TryGetProperty("document", out var document))
1585+
if (root.TryGetProperty("document", out var documentProp))
15861586
{
1587-
result.Document = DeserializeElement<DocumentStructure>(document);
1587+
result.Document = DeserializeElement<DocumentStructure>(documentProp);
15881588
}
15891589

15901590
if (root.TryGetProperty("metadata", out var metadata))

0 commit comments

Comments
 (0)