Skip to content

Commit de338da

Browse files
fix(ci): add diagnostics for Linux parity test failures (TASK-156)
- Add debug logging to init.zig (enable via CRSQL_DEBUG=1) - Add API initialization checks in site_identity UDF functions - Add diagnostic step to CI workflow to surface extension load issues - Update task card with investigation findings Root cause suspected: global sqlite3_api variable visibility in shared library on Linux differs from Darwin. Diagnostics will confirm if API pointer is null when UDFs are called.
1 parent baae184 commit de338da

File tree

4 files changed

+97
-1
lines changed

4 files changed

+97
-1
lines changed

.github/workflows/zig-tests.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,25 @@ jobs:
146146
working-directory: zig
147147
run: nix run nixpkgs#zig -- build
148148

149+
- name: Diagnose extension loading
150+
working-directory: zig
151+
run: |
152+
echo "=== Extension file info ==="
153+
ls -la zig-out/lib/
154+
file zig-out/lib/libcrsqlite.so
155+
echo ""
156+
echo "=== SQLite version ==="
157+
nix run nixpkgs#sqlite -- --version
158+
echo ""
159+
echo "=== Test extension load ==="
160+
CRSQL_DEBUG=1 nix run nixpkgs#sqlite -- :memory: -cmd ".load zig-out/lib/libcrsqlite.so" "SELECT crsql_version();" 2>&1 || true
161+
echo ""
162+
echo "=== Test crsql_db_version ==="
163+
nix run nixpkgs#sqlite -- :memory: -cmd ".load zig-out/lib/libcrsqlite.so" "SELECT crsql_db_version();" 2>&1 || true
164+
echo ""
165+
echo "=== Test crsql_site_id ==="
166+
nix run nixpkgs#sqlite -- :memory: -cmd ".load zig-out/lib/libcrsqlite.so" "SELECT hex(crsql_site_id());" 2>&1 || true
167+
149168
- name: Run parity tests
150169
working-directory: zig
151170
run: make test-parity

.tasks/active/TASK-156-linux-ci-test-parity.md

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ This task adds/strengthens Linux execution for the same “canonical” test ent
5454
- Version mismatch causes extension to load but functions return empty
5555
- Also 2 failing unit tests in `clset_vtab.zig` (edge case: "_schema" alone)
5656
- WASM build fails due to Zig 0.14 incompatibility
57+
- 2025-12-25: Deep investigation of Linux parity test failures:
58+
- CI run 20506960231 shows parity tests returning empty values on Linux
59+
- Unit tests PASS (same process), parity tests FAIL (cross-process via shell)
60+
- Extension loads successfully (no "no such function" errors)
61+
- Functions return empty instead of expected values
62+
- Root cause investigation: global variable visibility in shared library
63+
- Added diagnostic logging to `zig/src/ffi/init.zig` (via CRSQL_DEBUG=1)
64+
- Added API initialization checks in `site_identity.zig` functions
65+
- Added diagnostic step to CI workflow to surface errors
5766

5867
## Fixes Applied
5968
1. **CI Workflow**: Updated `.github/workflows/zig-tests.yaml` to use nix for zig consistently
@@ -65,5 +74,26 @@ This task adds/strengthens Linux execution for the same “canonical” test ent
6574
- Changed `name.len < suffix.len` to `name.len <= suffix.len`
6675
- Prevents "_schema" alone from being considered valid (empty base name)
6776

77+
3. **Diagnostic Logging**: Added debug output to init.zig (2025-12-25)
78+
- Enable with `CRSQL_DEBUG=1` environment variable
79+
- Logs each initialization step and any failures
80+
- Added API initialization checks in UDF implementations
81+
82+
4. **CI Diagnostics**: Added "Diagnose extension loading" step
83+
- Runs before parity tests to surface extension load issues
84+
- Shows file info, sqlite version, and basic function tests
85+
86+
## Investigation Status (2025-12-25)
87+
**Root cause suspected:** Global variable `sqlite3_api` in `sqlite_c.zig` may not be
88+
properly shared across shared library boundaries on Linux. The symptom is:
89+
- Extension init completes successfully (unit tests pass)
90+
- But when SQLite calls the UDF, the global `sqlite3_api` may be uninitialized
91+
- This causes `result_int64()` to silently do nothing (null API pointer check)
92+
93+
**Next steps:**
94+
1. Run CI to capture diagnostic output
95+
2. If API is null in UDF context, need to investigate Zig's shared library symbol visibility
96+
3. May need to change global variable to use `export` or different storage class
97+
6898
## Completion Notes
69-
(Pending CI verification)
99+
(Pending CI verification - diagnostics added)

zig/src/ffi/init.zig

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,25 @@
66
//! Reference: `core/src/crsqlite.c` (C implementation)
77

88
const std = @import("std");
9+
const builtin = @import("builtin");
910
const api = @import("api.zig");
11+
12+
/// Debug logging for extension initialization.
13+
/// Enabled via CRSQL_DEBUG=1 environment variable.
14+
/// Uses std.debug.print which writes to stderr on native platforms.
15+
fn debugLog(comptime fmt: []const u8, args: anytype) void {
16+
// Skip debug logging on freestanding/WASM targets
17+
if (comptime (builtin.os.tag == .freestanding or builtin.cpu.arch == .wasm32 or builtin.cpu.arch == .wasm64)) {
18+
return;
19+
}
20+
21+
// On native platforms, check CRSQL_DEBUG environment variable
22+
const debug_env = std.posix.getenv("CRSQL_DEBUG");
23+
if (debug_env == null) return;
24+
if (!std.mem.eql(u8, debug_env.?, "1")) return;
25+
26+
std.debug.print("[crsql-init] " ++ fmt ++ "\n", args);
27+
}
1028
const after_write = @import("../local_writes/after_write.zig");
1129
const as_crr = @import("../as_crr.zig");
1230
const automigrate = @import("../automigrate.zig");
@@ -189,11 +207,15 @@ pub export fn sqlite3_crsqlite_init(
189207
) callconv(.c) c_int {
190208
_ = pzErrMsg; // TODO: use for detailed error messages
191209

210+
debugLog("sqlite3_crsqlite_init called", .{});
211+
192212
// Initialize the global API pointer (SQLITE_EXTENSION_INIT2 equivalent)
193213
const init_rc = api.initApi(pApi);
194214
if (init_rc != api.SQLITE_OK) {
215+
debugLog("initApi failed with rc={d}", .{init_rc});
195216
return init_rc;
196217
}
218+
debugLog("initApi succeeded", .{});
197219

198220
// Enable trusted_schema so our INNOCUOUS functions can be called from triggers.
199221
// Without this, SQLite 3.31.0+ rejects functions in triggers by default.
@@ -210,8 +232,10 @@ pub export fn sqlite3_crsqlite_init(
210232
null,
211233
);
212234
if (create_master_rc != api.SQLITE_OK) {
235+
debugLog("create crsql_master failed with rc={d}", .{create_master_rc});
213236
return create_master_rc;
214237
}
238+
debugLog("crsql_master table created", .{});
215239

216240
// Write the crsqlite_version to crsql_master for cross-implementation compatibility.
217241
// This is critical: when Rust/C opens a Zig-created database, it checks this version
@@ -221,8 +245,10 @@ pub export fn sqlite3_crsqlite_init(
221245
// Using INSERT OR IGNORE so we don't overwrite an existing version on re-open.
222246
const write_version_rc = writeVersionToMaster(db);
223247
if (write_version_rc != api.SQLITE_OK) {
248+
debugLog("writeVersionToMaster failed with rc={d}", .{write_version_rc});
224249
return write_version_rc;
225250
}
251+
debugLog("version written to crsql_master", .{});
226252

227253
// Create crsql_tracked_peers table for tracking peer sync state.
228254
// Reference: core/rs/core/src/bootstrap.rs
@@ -248,23 +274,31 @@ pub export fn sqlite3_crsqlite_init(
248274
null,
249275
);
250276
if (create_tracked_peers_rc != api.SQLITE_OK) {
277+
debugLog("create crsql_tracked_peers failed with rc={d}", .{create_tracked_peers_rc});
251278
return create_tracked_peers_rc;
252279
}
280+
debugLog("crsql_tracked_peers table created", .{});
253281

254282
// Initialize site_id (creates table if needed, loads or generates site_id)
255283
if (!site_identity.initSiteId(db)) {
284+
debugLog("initSiteId failed", .{});
256285
return api.SQLITE_ERROR;
257286
}
287+
debugLog("site_id initialized", .{});
258288

259289
// Initialize db_version from existing clock tables
260290
site_identity.initDbVersionFromDb(db);
291+
debugLog("db_version initialized to {d}", .{site_identity.getDbVersion()});
261292

262293
// Register functions
263294
const func_rc = registerFunctions(db);
264295
if (func_rc != api.SQLITE_OK) {
296+
debugLog("registerFunctions failed with rc={d}", .{func_rc});
265297
return func_rc;
266298
}
299+
debugLog("functions registered successfully", .{});
267300

301+
debugLog("sqlite3_crsqlite_init completed successfully", .{});
268302
return api.SQLITE_OK;
269303
}
270304

zig/src/site_identity.zig

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,12 @@ fn crsqlSiteIdFunc(
329329
return;
330330
}
331331

332+
// Check if API is properly initialized
333+
if (!api.isInitialized()) {
334+
api.result_error(pCtx, "crsql: API not initialized in site_id", -1);
335+
return;
336+
}
337+
332338
// Get the database handle for THIS connection
333339
const db = api.context_db_handle(pCtx);
334340
if (db == null) {
@@ -371,6 +377,13 @@ fn crsqlDbVersionFunc(
371377
return;
372378
}
373379

380+
// Check if API is properly initialized
381+
if (!api.isInitialized()) {
382+
// API not initialized - this would explain empty results
383+
api.result_error(pCtx, "crsql: API not initialized in db_version", -1);
384+
return;
385+
}
386+
374387
// Return the current db_version from global state
375388
// Note: We skip checking pre_compact_dbversion here because:
376389
// 1. It requires a nested SQL query which can cause issues

0 commit comments

Comments
 (0)