Skip to content

Commit f8dfccc

Browse files
committed
Implemented a more robust fnv1a_hash function
1 parent 8b610fa commit f8dfccc

File tree

2 files changed

+64
-8
lines changed

2 files changed

+64
-8
lines changed

src/cloudsync.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "sqlite3.h"
1717
#endif
1818

19-
#define CLOUDSYNC_VERSION "0.7.8"
19+
#define CLOUDSYNC_VERSION "0.8.0"
2020

2121
int sqlite3_cloudsync_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
2222

src/utils.c

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ SQLITE_EXTENSION_INIT3
2929

3030
#define FNV_OFFSET_BASIS 0xcbf29ce484222325ULL
3131
#define FNV_PRIME 0x100000001b3ULL
32+
#define HASH_CHAR(_c) do { h ^= (uint8_t)(_c); h *= FNV_PRIME; h_final = h; } while (0)
3233

3334
// MARK: UUIDv7 -
3435

@@ -201,15 +202,70 @@ char *cloudsync_string_replace_prefix(const char *input, char *prefix, char *rep
201202
return (char *)input;
202203
}
203204

204-
uint64_t fnv1a_hash(const char *data, size_t len) {
205-
uint64_t hash = FNV_OFFSET_BASIS;
206-
for (size_t i = 0; i < len; ++i) {
207-
hash ^= (uint8_t)data[i];
208-
hash *= FNV_PRIME;
205+
/*
206+
Compute a normalized hash of a SQLite CREATE TABLE statement.
207+
208+
* Normalization:
209+
* - Skips comments (-- and / * )
210+
* - Skips non-printable characters
211+
* - Collapses runs of whitespace to single space
212+
* - Case-insensitive outside quotes
213+
* - Preserves quoted string content exactly
214+
* - Handles escaped quotes
215+
* - Trims trailing spaces and semicolons from the effective hash
216+
*/
217+
uint64_t fnv1a_hash (const char *data, size_t len) {
218+
uint64_t h = FNV_OFFSET_BASIS;
219+
int q = 0; // quote state: 0 / '\'' / '"'
220+
int cmt = 0; // comment state: 0 / 1=line / 2=block
221+
int last_space = 1; // prevent leading space
222+
uint64_t h_final = h; // hash state after last non-space, non-semicolon char
223+
224+
for (size_t i = 0; i < len; i++) {
225+
int c = data[i];
226+
int next = (i + 1 < len) ? data[i + 1] : 0;
227+
228+
// detect start of comments
229+
if (!q && !cmt && c == '-' && next == '-') {cmt = 1; i += 1; continue;}
230+
if (!q && !cmt && c == '/' && next == '*') {cmt = 2; i += 1; continue;}
231+
232+
// skip comments
233+
if (cmt == 1) {if (c == '\n') cmt = 0; continue;}
234+
if (cmt == 2) {if (c == '*' && next == '/') { cmt = 0; i += 1; } continue;}
235+
236+
// handle quotes
237+
if (c == '\'' || c == '"') {
238+
if (q == c) {
239+
if (next == c) {HASH_CHAR(c); i += 1; continue;}
240+
q = 0;
241+
} else if (!q) q = c;
242+
HASH_CHAR(c);
243+
last_space = 0;
244+
continue;
245+
}
246+
247+
// inside quote → hash exactly
248+
if (q) {HASH_CHAR(c); last_space = 0; continue;}
249+
250+
// skip non-printable
251+
if (!isprint((unsigned char)c)) continue;
252+
253+
// whitespace normalization
254+
if (isspace((unsigned char)c)) {
255+
if (!last_space) {HASH_CHAR(' '); last_space = 1;}
256+
continue;
257+
}
258+
259+
// skip semicolons at end
260+
if (c == ';') {last_space = 1; continue;}
261+
262+
// normal visible char
263+
HASH_CHAR(tolower(c));
264+
last_space = 0;
209265
}
210-
return hash;
266+
267+
return h_final;
211268
}
212-
213269
// MARK: - CRDT algos -
214270

215271
table_algo crdt_algo_from_name (const char *algo_name) {

0 commit comments

Comments
 (0)