Skip to content
99 changes: 99 additions & 0 deletions .github/scripts/generate-snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/sh

ROW_COUNT=1000
WAL_AUTOCHECKPOINT=0
MAIN_ONLY=0
usage() {
cat <<USAGE
Usage: generate-snapshot.sh [--main-only] [--wal-limit <num>] [--row-count <num>] <output-dir>

Options:
--main-only Generate a main database only snapshot (no WAL)
--wal-limit <num> Maximum amount of WAL pages to keep (default: 0 - unbounded)
--row-count <num> Number of rows to generate (default: 1000)
-h, --help Show this help message

USAGE
exit 0
}

# Parse arguments
while [ $# -gt 0 ]; do
case "$1" in
--main-only)
MAIN_ONLY=1
shift 1
;;
--wal-limit)
if [ -z "$2" ]; then
echo "Error: --wal-limit requires a value" >&2
usage
fi
WAL_AUTOCHECKPOINT="$2"
shift 2
;;
--row-count)
if [ -z "$2" ]; then
echo "Error: --row-count requires a value" >&2
usage
fi
ROW_COUNT="$2"
shift 2
;;
-h|--help)
usage
;;
*)
if [ -n "$2" ]; then
echo "Error: unknown option '$1'" >&2
usage
fi
OUTPUT_DIR="$1"
shift 1
;;
esac
done

if [ -z "$OUTPUT_DIR" ]; then
echo "Error: Missing output directory" >&2
usage
fi

# Cleanup
trap 'rm -f temp temp-wal temp-shm' EXIT

SQLITE3_CHECKPOINT_ON_CLOSE=""
if [ "$MAIN_ONLY" -eq 0 ]; then
SQLITE3_CHECKPOINT_ON_CLOSE=".dbconfig no_ckpt_on_close on"
fi

# First generate a sqlite3 database
cat <<EOF | sqlite3 temp > /dev/null 2>&1
PRAGMA journal_mode=WAL;
PRAGMA wal_autocheckpoint=$WAL_AUTOCHECKPOINT;
$SQLITE3_CHECKPOINT_ON_CLOSE

CREATE TABLE test(id INTEGER PRIMARY KEY, value TEXT NOT NULL);
WITH sequence AS (
SELECT 1 AS id

UNION ALL

SELECT id + 1
FROM sequence
WHERE id < $ROW_COUNT
)
INSERT OR REPLACE INTO test
SELECT id, hex(randomblob(16))
FROM sequence;

EOF


# Now generate a dqlite snapshot from the sqlite3 database
cat <<EOF | dqlite-utils > /dev/null 2>&1
.snapshot
.add-server "1"
ATTACH DATABASE "temp" AS test;
.finish "$OUTPUT_DIR"
EOF
35 changes: 32 additions & 3 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
matrix:
os:
- ubuntu-22.04
- ubuntu-24.04
- ubuntu-latest
- ubuntu-22.04-arm
- ubuntu-24.04-arm
compiler:
Expand All @@ -27,7 +27,15 @@ jobs:
sudo apt update
sudo apt install -y libsqlite3-dev liblz4-dev libuv1-dev xfslibs-dev \
linux-libc-dev btrfs-progs xfsprogs zfsutils-linux \
lcov
lcov sqlite3
# Get dqlite-utils tool by downloading prebuilt binary. Eventually we will
# replace this with a proper package installation or with a snap.
curl -H "Authorization: token ${{ secrets.TEMP_DQLITE_UTILS_TOKEN }}" \
-H "Accept: application/octet-stream" \
-L https://api.github.com/repos/canonical/dqlite-utils/releases/assets/343675902 \
-o release.tar.xz
sudo tar -xf release.tar.xz -C /usr/local/bin dqlite-utils
rm release.tar.xz

- name: Build dqlite
env:
Expand All @@ -38,6 +46,24 @@ jobs:
--enable-build-raft
make -j$(nproc) check-norun

- name: Generate test snapshots
run: |
(mkdir test-snapshots &&
cd test-snapshots && \
# Simple cases
../.github/scripts/generate-snapshot.sh --main-only --row-count 1000 small-no-wal && \
../.github/scripts/generate-snapshot.sh --main-only --row-count 100000 medium-no-wal && \
../.github/scripts/generate-snapshot.sh --main-only --row-count 10000000 large-no-wal && \
# With WAL
../.github/scripts/generate-snapshot.sh --wal-limit 1000 --row-count 1000 small-with-wal && \
../.github/scripts/generate-snapshot.sh --wal-limit 1000 --row-count 100000 medium-with-wal && \
../.github/scripts/generate-snapshot.sh --wal-limit 1000 --row-count 10000000 large-with-wal && \
# With huge WAL
../.github/scripts/generate-snapshot.sh --row-count 1000 small-huge-wal && \
../.github/scripts/generate-snapshot.sh --row-count 100000 medium-huge-wal && \
../.github/scripts/generate-snapshot.sh --row-count 10000000 large-huge-wal \
)

- name: Test
env:
CC: ${{ matrix.compiler }}
Expand All @@ -46,7 +72,10 @@ jobs:
run: |
./test/raft/lib/fs.sh setup
export $(./test/raft/lib/fs.sh detect)
sudo UV_THREADPOOL_SIZE=$(($(nproc) * 2)) make check || (cat test-suite.log && false)
sudo \
DQLITE_TEST_SNAPSHOT_DIRS=$(echo test-snapshots/*/ | tr ' ' :) \
UV_THREADPOOL_SIZE=$(($(nproc) * 2)) \
make check || (cat test-suite.log && false)
./test/raft/lib/fs.sh teardown

- name: Coverage
Expand Down
100 changes: 80 additions & 20 deletions src/fsm.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <assert.h>
#include <stdint.h>
#include <sys/mman.h>

#include "command.h"
Expand All @@ -12,7 +13,6 @@
#include "tracing.h"
#include "vfs.h"


struct fsmDatabaseSnapshot {
sqlite3 *conn;
struct raft_buffer header;
Expand All @@ -25,8 +25,7 @@ struct fsmSnapshot {
size_t database_count;
};

struct fsm
{
struct fsm {
struct logger *logger;
struct registry *registry;
struct fsmSnapshot snapshot;
Expand Down Expand Up @@ -74,9 +73,9 @@ static int apply_frames(struct fsm *f, const struct command_frames *c)
}

struct vfsTransaction transaction = {
.n_pages = c->frames.n_pages,
.n_pages = c->frames.n_pages,
.page_numbers = c->frames.page_numbers,
.pages = c->frames.pages,
.pages = c->frames.pages,
};
rv = VfsApply(conn, &transaction);
if (rv != 0) {
Expand Down Expand Up @@ -202,10 +201,8 @@ static int decodeDatabase(const struct registry *r,

const size_t page_size = r->config->vfs.page_size;
dqlite_assert((header.main_size % page_size) == 0);
dqlite_assert(header.wal_size == 0);

const size_t page_count = (size_t)header.main_size / page_size;

size_t page_count = (size_t)header.main_size / page_size;
void **pages = raft_malloc(sizeof(void *) * page_count);
if (pages == NULL) {
return RAFT_NOMEM;
Expand All @@ -215,7 +212,66 @@ static int decodeDatabase(const struct registry *r,
}
cursor->p += header.main_size;

*snapshot = (struct vfsSnapshot) {
const size_t wal_header_size = 32;
if (header.wal_size > wal_header_size) {
tracef("pre 1.17 snapshot loading");
const size_t wal_frame_header_size = 24;
const size_t wal_frame_size = page_size + wal_frame_header_size;

dqlite_assert(header.wal_size > wal_header_size);
dqlite_assert(((header.wal_size - (size_t)wal_header_size) %
wal_frame_size) == 0);

const unsigned n_frames =
(unsigned)((header.wal_size - (size_t)wal_header_size) /
wal_frame_size);
const char *wal = cursor->p;
const void *last_frame =
(const uint8_t *)wal + wal_header_size +
n_frames * wal_frame_size - wal_frame_size;

const size_t wal_page_count = ByteGetBe32(last_frame + 4);
dqlite_assert(wal_page_count != 0);

if (wal_page_count > page_count) {
void *wal_pages = raft_realloc(
pages, sizeof(void *) * wal_page_count);
if (wal_pages == NULL) {
raft_free(pages);
return RAFT_NOMEM;
}
pages = wal_pages;
}

/* Read pages in the WAL order */
for (const char *frame = wal + wal_header_size;
frame < wal + header.wal_size; frame += wal_frame_size) {
uint32_t page_number =
ByteGetBe32((const uint8_t *)frame);
if (page_number > wal_page_count) {
continue;
}
dqlite_assert(page_number > 0);
pages[page_number - 1] =
(void *)(frame + wal_frame_header_size);
}

/* Verify that if the WAL resized the database, then no page is
* missing. */
for (size_t page_number = page_count;
page_number <= wal_page_count; page_number++) {
if (pages[page_number - 1] == NULL) {
tracef("missing page %" PRIu64 " in wal",
(uint64_t)(page_number));
raft_free(pages);
return RAFT_INVALID;
}
}
page_count = wal_page_count;
}
cursor->p += header.wal_size;

*snapshot = (struct vfsSnapshot){
.page_count = page_count,
.page_size = page_size,
.pages = pages,
Expand All @@ -225,7 +281,8 @@ static int decodeDatabase(const struct registry *r,
return RAFT_OK;
}

static int integrityCheckCb(void *pArg, int n, char **values, char **names) {
static int integrityCheckCb(void *pArg, int n, char **values, char **names)
{
bool *check_passed = pArg;

PRE(check_passed != NULL);
Expand Down Expand Up @@ -260,9 +317,11 @@ static int restoreDatabase(struct registry *r,
if (rv == SQLITE_OK) {
bool check_passed = true;
char *errmsg;
rv = sqlite3_exec(conn, "PRAGMA quick_check", integrityCheckCb, &check_passed, &errmsg);
rv = sqlite3_exec(conn, "PRAGMA quick_check", integrityCheckCb,
&check_passed, &errmsg);
if (rv != SQLITE_OK) {
tracef("PRAGMA quick_check failed: %s (%d)", errmsg, rv);
tracef("PRAGMA quick_check failed: %s (%d)", errmsg,
rv);
} else if (!check_passed) {
rv = SQLITE_CORRUPT;
}
Expand Down Expand Up @@ -311,7 +370,8 @@ static int snapshotDatabase(struct db *db, struct fsmDatabaseSnapshot *snapshot)

const struct snapshotDatabase header = {
.filename = db->filename,
.main_size = snapshot->content.page_count * snapshot->content.page_size,
.main_size =
snapshot->content.page_count * snapshot->content.page_size,
.wal_size = 0,
};

Expand All @@ -325,7 +385,7 @@ static int snapshotDatabase(struct db *db, struct fsmDatabaseSnapshot *snapshot)
char *cursor = header_buffer;
snapshotDatabase__encode(&header, &cursor);

snapshot->header = (struct raft_buffer) {
snapshot->header = (struct raft_buffer){
.base = header_buffer,
.len = header_size,
};
Expand Down Expand Up @@ -380,7 +440,8 @@ static int fsm__snapshot(struct raft_fsm *fsm,
i++;
}

struct raft_buffer *buffers = raft_malloc(buffer_count * sizeof(struct raft_buffer));
struct raft_buffer *buffers =
raft_malloc(buffer_count * sizeof(struct raft_buffer));
if (buffers == NULL) {
rv = RAFT_NOMEM;
goto err;
Expand All @@ -397,9 +458,8 @@ static int fsm__snapshot(struct raft_fsm *fsm,
buff_i++;

dqlite_assert((buff_i + databases[i].content.page_count) <=
buffer_count);
for (unsigned j = 0; j < databases[i].content.page_count;
j++) {
buffer_count);
for (unsigned j = 0; j < databases[i].content.page_count; j++) {
buffers[buff_i] = (struct raft_buffer){
.base = databases[i].content.pages[j],
.len = databases[i].content.page_size,
Expand Down Expand Up @@ -462,7 +522,7 @@ static int fsm__restore(struct raft_fsm *fsm, struct raft_buffer *buf)
{
tracef("fsm restore");
struct fsm *f = fsm->data;
struct cursor cursor = {buf->base, buf->len};
struct cursor cursor = { buf->base, buf->len };
struct snapshotHeader header;
unsigned i;
int rv;
Expand Down Expand Up @@ -508,7 +568,7 @@ int fsm__init(struct raft_fsm *fsm,
if (f == NULL) {
return DQLITE_NOMEM;
}
*f = (struct fsm) {
*f = (struct fsm){
.logger = &config->logger,
.registry = registry,
};
Expand Down
Loading
Loading