diff --git a/db/comdb2.h b/db/comdb2.h index 74ae7b4d5d..ddd8730491 100644 --- a/db/comdb2.h +++ b/db/comdb2.h @@ -3707,7 +3707,7 @@ int is_tablename_queue(const char *); int rename_table_options(void *tran, struct dbtable *db, const char *newname); -int comdb2_get_verify_remote_schemas(void); +int comdb2_get_verify_remote_schemas(struct sqlclntstate *clnt); void comdb2_set_verify_remote_schemas(void); const char *thrman_get_where(struct thr_handle *thr); diff --git a/db/db_fingerprint.c b/db/db_fingerprint.c index 06483fe94f..d643470fe4 100644 --- a/db/db_fingerprint.c +++ b/db/db_fingerprint.c @@ -208,7 +208,7 @@ void add_fingerprint(struct sqlclntstate *clnt, sqlite3_stmt *stmt, struct strin char *params = NULL; int calc_query_plan = gbl_query_plans && !is_lua; if (calc_query_plan) { - query_plan_ref = form_query_plan(stmt); + query_plan_ref = form_query_plan(clnt, stmt); calc_fingerprint(query_plan_ref ? string_ref_cstr(query_plan_ref) : NULL, &temp, plan_fingerprint); if (gbl_sample_queries && query_plan_ref && param_count(clnt) > 0) { // only get params string if we need it diff --git a/db/db_query_plan.c b/db/db_query_plan.c index c67ddcc11e..de0ef4aa38 100644 --- a/db/db_query_plan.c +++ b/db/db_query_plan.c @@ -31,7 +31,7 @@ extern hash_t *gbl_fingerprint_hash; extern pthread_mutex_t gbl_fingerprint_hash_mu; // return NULL if no plan -struct string_ref *form_query_plan(sqlite3_stmt *stmt) +struct string_ref *form_query_plan(struct sqlclntstate *clnt, sqlite3_stmt *stmt) { struct string_ref *query_plan_ref; Op *op; @@ -61,7 +61,7 @@ struct string_ref *form_query_plan(sqlite3_stmt *stmt) strbuf_appendf(query_plan_buf, "open %s cursor on ", operation); describe_cursor(v, pc, &c); - print_cursor_description(query_plan_buf, &c, 0); + print_cursor_description(clnt, query_plan_buf, &c, 0); } query_plan_ref = strbuf_len(query_plan_buf) > 0 ? create_string_ref((char *)strbuf_buf(query_plan_buf)) : NULL; diff --git a/db/disttxn.c b/db/disttxn.c index 3aac9729b6..c8cf0d2d45 100644 --- a/db/disttxn.c +++ b/db/disttxn.c @@ -1873,8 +1873,8 @@ int participant_has_failed(const char *dist_txnid, const char *coordinator_name, int rcode, int outrc, const char *errmsg) { if (gbl_debug_disttxn_trace) { - logmsg(LOGMSG_USER, "DISTTXN %s dist_txnid %s rcode=%d outrc=%d errmsg=%s\n", __func__, dist_txnid, rcode, - outrc, errmsg); + logmsg(LOGMSG_USER, "DISTTXN %s FAILED dist_txnid %s rcode=%d outrc=%d errmsg=%s\n", __func__, dist_txnid, + rcode, outrc, errmsg); } Pthread_mutex_lock(&part_lk); participant_t *p = hash_find(participant_hash, &dist_txnid); @@ -1891,7 +1891,8 @@ int participant_has_failed(const char *dist_txnid, const char *coordinator_name, void participant_has_propagated(const char *dist_txnid, const char *coordinator_name, const char *coordinator_master) { if (gbl_debug_disttxn_trace) { - logmsg(LOGMSG_USER, "DISTTXN %s dist_txnid %s\n", __func__, dist_txnid); + logmsg(LOGMSG_USER, "DISTTXN %s PROPAGATED %s master %s dist_txnid %s\n", __func__, coordinator_name, + coordinator_master, dist_txnid); } send_coordinator_propagated(dist_txnid, coordinator_name, coordinator_master); disable_sanc_heartbeats(dist_txnid, 1); @@ -1902,7 +1903,8 @@ static int participant_wait_int(const char *dist_txnid, const char *coordinator_ const char *coordinator_master) { if (gbl_debug_disttxn_trace) { - logmsg(LOGMSG_USER, "DISTTXN %s dist_txnid %s\n", __func__, dist_txnid); + logmsg(LOGMSG_USER, "DISTTXN %s WAIT %s tier %s master %s dist_txnid %s\n", __func__, coordinator_name, + coordinator_tier, coordinator_master, dist_txnid); } int rtn = 0, first = 1, lock_desired = 0; diff --git a/db/fdb_fend.c b/db/fdb_fend.c index 8c0f875cdd..3fe3405152 100644 --- a/db/fdb_fend.c +++ b/db/fdb_fend.c @@ -64,14 +64,17 @@ #include "fdb_whitelist.h" #include "schemachange.h" +#include "disttxn.h" extern int gbl_fdb_resolve_local; extern int gbl_fdb_allow_cross_classes; extern int gbl_partial_indexes; extern int gbl_expressions_indexes; +extern int gbl_debug_disttxn_trace; int gbl_fdb_default_ver = FDB_VER; int gbl_fdb_track = 0; +int gbl_fdb_track_locking = 0; int gbl_fdb_track_times = 0; int gbl_test_io_errors = 0; int gbl_fdb_push_remote = 1; @@ -111,27 +114,39 @@ struct fdb_tbl_ent { struct fdb_tbl_ent) lnk; /* link for entries list (data and indexes) */ }; -/* foreign db table structure, caches the sql master rows */ +/* foreign db table structure, caches the sql master rows; + * this is stored in two places: the source is in fdb + * it is also cached in the sqlite engine + */ struct fdb_tbl { char *name; int name_len; /* no zero */ unsigned long long version; /* version of the tbl/index when cached */ - /* explain support */ - struct fdb *fdb; /* tbl->fdb(name) */ + /* the tbl->fdb link is valid for as long we have a valid fdb live lock + * There are two possibilities: + * 1) during prepare, for discovering remote tables; if sqlite already has the remote tables + * no such lock is needed; otherwise, we got live lock and table locks for the table + * to be attached, and the stats tables; once the sqlite has populated with schema and + * stats, the live lock and any table locks are released + * 2) during query run, after we get table locks, until we release them; each table lock bumps its + * fdb live lock once + */ + struct fdb *fdb; int nix; /* number of indexes */ int ix_partial; /* is there partial index */ int ix_expr; /* is there expressions index */ - LISTC_T(struct fdb_tbl_ent) ents; - pthread_mutex_t ents_mtx; /* entries add/rm lock */ /*TODO: review this - mutex, we need - something else */ + LISTC_T(struct fdb_tbl_ent) ents; /* protected by table_lock */ + LINKC_T(struct fdb_tbl) lnk; /* link for tables for a fdb */ + pthread_rwlock_t table_lock; /* use to lock the table, by sqlite engines or cleanup */ int need_version; /* a remote op detected that local is stale, and this hints to the new version */ + pthread_mutex_t need_version_mtx; /* mutex for the need_version; a stale + tbl is also unlinked under this lock */ }; /* foreign db structure, caches the used tables for the remote db */ @@ -144,23 +159,29 @@ struct fdb { int local; /* was this added by a LOCAL access ?*/ int dbnum; /* cache dbnum for db, needed by current dbt_handl_alloc* */ - int users; /* how many clients this db has, sql engines and cursors */ - pthread_mutex_t users_mtx; + /* this controls the life of a fdb object (and only protects against fdb removal) + * readers acquire this lock during get_fdb/new_fdb; + * removing this fdb requires write mode + */ + pthread_rwlock_t inuse_rwlock; + /* this protects the table list and hashes for them, and it is short them lock; + * the tables are protected using table locks; we cache the table in clnt + * and use mvcc to update them + */ + pthread_mutex_t tables_mtx; /* protect the tables list */ + LISTC_T(struct fdb_tbl) tables; /* list of tables */ hash_t * h_ents_rootp; /* FDB_TBL_ENT_T data and index entries, by rootpage */ hash_t *h_ents_name; /* FDB_TBL_ENT_T data and index entries, by name */ hash_t *h_tbls_name; /* FDB_TBL_T entries */ - pthread_rwlock_t h_rwlock; /* hash lock */ fdb_location_t *loc; /* where is the db located? */ COMDB2BUF *dbcon; /* cached db connection */ pthread_mutex_t dbcon_mtx; - Schema *schema; /* shared schema for fdb tables */ - - fdb_sqlstat_cache_t *sqlstats; /* cache of sqlite stats, per foreign db */ pthread_mutex_t sqlstats_mtx; /* mutex for stats */ + fdb_sqlstat_cache_t *sqlstats; /* cache of sqlite stats, per foreign db */ int has_sqlstat1; /* if sqlstat1 was found */ int has_sqlstat4; /* if sqlstat4 was found */ @@ -174,7 +195,7 @@ struct fdb_cache { int nalloc; /* allocated array */ int nused; /* number of foreign dbs */ fdb_t **arr; /* the array of foreign_db objects */ - pthread_rwlock_t arr_lock; /* nalloc, nused and arr lock */ + pthread_mutex_t arr_mtx; /* nalloc, nused and arr lock */ hash_t *h_curs; /* list of cursors */ pthread_rwlock_t h_curs_lock; /* cursors lock, receive side */ @@ -234,18 +255,22 @@ typedef struct fdb_systable_info { static fdb_cache_t fdbs; -static fdb_t *__cache_fnd_fdb(const char *dbname, int *idx); -static int __cache_link_fdb(fdb_t *fdb); -static void __cache_unlink_fdb(fdb_t *fdb); +static void _link_fdb_table(fdb_t *fdb, fdb_tbl_t *tbl); +static void _unlink_fdb_table(fdb_t *fdb, fdb_tbl_t *tbl); +static int _free_fdb_tbl(fdb_t *fdb, fdb_tbl_t *tbl); +/* calls _free_fdb_tbl if this is the last reader */ +static void _try_free_fdb_tbl(fdb_t *fdb, fdb_tbl_t *tbl); +static fdb_sqlstat_cache_t *_sqlstats_get(fdb_t *fdb, sqlclntstate *clnt); +static int _clnt_cache_add_tbl(sqlclntstate *clnt, fdb_tbl_t *tbl); +void _clnt_cache_rem_tbl(sqlclntstate *clnt, fdb_tbl_t *tbl); +fdb_tbl_t *_clnt_cache_get_tbl_by_name(sqlclntstate *clnt, const char *name); +static void _clnt_cache_destroy(sqlclntstate *clnt); -static int insert_table_entry_from_packedsqlite(fdb_t *fdb, fdb_tbl_t *tbl, - char *row, int rowlen, - fdb_tbl_ent_t **found_ent, - int versioned); -static int check_table_fdb(fdb_t *fdb, fdb_tbl_t *tbl, int initial, - fdb_tbl_ent_t **found_ent, int is_sqlite_master); +static int _add_fdb_tbl_ent_from_packedksqlite(const char *dbname, fdb_tbl_t *tbl, char *row, int rowlen, + int versioned); +static int _retrieve_fdb_tbl(fdb_t *fdb, fdb_tbl_t *tbl, int initial); -static int fdb_num_entries(fdb_t *fdb); +static int _num_entries(fdb_t *fdb); /* REMCUR frontend implementation */ static int fdb_cursor_close(BtCursor *pCur); @@ -293,15 +318,6 @@ static int fdb_cursor_update(BtCursor *pCur, sqlclntstate *clnt, fdb_tran_t *trans, unsigned long long oldgenid, unsigned long long genid, int datalen, char *data); -/* NOTE: ALERT! always call this with h_rwlock acquired; as of now add_table_fdb - has WR lock - on it and it is undefined behaviour to get the read lock here */ -static fdb_tbl_ent_t *get_fdb_tbl_ent_by_name_from_fdb(fdb_t *fdb, - const char *name); - -static int __free_fdb_tbl(void *obj, void *arg); -static int __lock_wrlock_exclusive(char *dbname); - /* Node affinity functions: a clnt tries to stick to one node, unless error in which case it will move to another one; error will not impact other concurrent @@ -314,7 +330,7 @@ void _fdb_clear_clnt_node_affinities(sqlclntstate *clnt); static int _get_protocol_flags(sqlclntstate *clnt, fdb_t *fdb, int *flags); -static int _validate_existing_table(fdb_t *fdb, int cls, int local); +static int _validate_existing_fdb(fdb_t *fdb, int cls, int local); int fdb_get_remote_version(const char *dbname, const char *table, enum mach_class class, int local, @@ -340,7 +356,7 @@ int fdb_cache_init(int n) return -1; } fdbs.nalloc = n; - Pthread_rwlock_init(&fdbs.arr_lock, NULL); + Pthread_mutex_init(&fdbs.arr_mtx, NULL); fdbs.h_curs = hash_init_i4(0); Pthread_rwlock_init(&fdbs.h_curs_lock, NULL); @@ -350,9 +366,9 @@ int fdb_cache_init(int n) /** * internal, locate an fdb object based on name - * + * internal, needs caller locking (fdbs.arr_mtx) */ -static fdb_t *__cache_fnd_fdb(const char *dbname, int *idx) +static fdb_t *_cache_fnd_fdb(const char *dbname, int *idx) { int len = strlen(dbname); int i = 0; @@ -374,10 +390,9 @@ static fdb_t *__cache_fnd_fdb(const char *dbname, int *idx) /** * add a fdb to the cache - * internal, needs caller locking (arr_lock) - * + * internal, needs caller locking (fdbs.arr_mtx) */ -static int __cache_link_fdb(fdb_t *fdb) +static int _cache_link_fdb(fdb_t *fdb) { int rc = FDB_NOERR; fdb_t **ptr; @@ -403,10 +418,10 @@ static int __cache_link_fdb(fdb_t *fdb) /** * remove a fdb to the cache - * internal, needs caller locking (fdbs.arr_lock) + * internal, needs caller locking (fdbs.arr_mtx) * */ -static void __cache_unlink_fdb(fdb_t *fdb) +static void _cache_unlink_fdb(fdb_t *fdb) { int ix; @@ -418,9 +433,10 @@ static void __cache_unlink_fdb(fdb_t *fdb) logmsg(LOGMSG_ERROR, "%s: bug? for db %s\n", __func__, fdb->dbname); return; } + if (fdbs.nused > ix + 1) { - memmove(&fdbs.arr[ix], &fdbs.arr[ix + 1], - sizeof(fdbs.arr[0]) * (fdbs.nused - ix - 1)); + /* copy tail link to the removed fdb */ + fdbs.arr[ix] = fdbs.arr[fdbs.nused - 1]; } fdbs.nused--; fdbs.arr[fdbs.nused] = NULL; @@ -430,75 +446,91 @@ static void __cache_unlink_fdb(fdb_t *fdb) * Free an fdb object * */ -void __free_fdb(fdb_t *fdb) +static void _free_fdb(fdb_t *fdb) { + fdb_tbl_t *tmp, *tbl; + + LISTC_FOR_EACH_SAFE(&fdb->tables, tbl, tmp, lnk) + { + _unlink_fdb_table(fdb, tbl); + _free_fdb_tbl(fdb, tbl); + } + + fdb_sqlstat_cache_destroy(&fdb->sqlstats); + free(fdb->dbname); + Pthread_mutex_destroy(&fdb->tables_mtx); + Pthread_rwlock_destroy(&fdb->inuse_rwlock); hash_free(fdb->h_ents_rootp); hash_free(fdb->h_ents_name); hash_free(fdb->h_tbls_name); - Pthread_rwlock_destroy(&fdb->h_rwlock); - Pthread_mutex_destroy(&fdb->sqlstats_mtx); Pthread_mutex_destroy(&fdb->dbcon_mtx); - Pthread_mutex_destroy(&fdb->users_mtx); + Pthread_mutex_destroy(&fdb->sqlstats_mtx); free(fdb); } /** - * Add a lockless user + * Retrieve a fdb object; + * If found, return the fdb read locked * */ -static void __fdb_add_user(fdb_t *fdb, int noTrace) +fdb_t *get_fdb_int(const char *dbname, enum fdb_get_flag flag, const char *f, int l) { - Pthread_mutex_lock(&fdb->users_mtx); - fdb->users++; - - if (!noTrace && gbl_fdb_track) - logmsg(LOGMSG_USER, "%p %s %s users %d\n", (void *)pthread_self(), __func__, fdb->dbname, fdb->users); - - assert(fdb->users > 0); - Pthread_mutex_unlock(&fdb->users_mtx); -} - -/** - * Remove a lockless user - * - */ -static void __fdb_rem_user(fdb_t *fdb, int noTrace) -{ - Pthread_mutex_lock(&fdb->users_mtx); - fdb->users--; - - if (!noTrace && gbl_fdb_track) - logmsg(LOGMSG_USER, "%p %s %s users %d\n", (void *)pthread_self(), __func__, fdb->dbname, fdb->users); + fdb_t *fdb = NULL; - assert(fdb->users >= 0); - Pthread_mutex_unlock(&fdb->users_mtx); + Pthread_mutex_lock(&fdbs.arr_mtx); + fdb = _cache_fnd_fdb(dbname, NULL); + if (fdb && flag == FDB_GET_LOCK) { + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking fdb %s get_fdb at %s:%d\n", fdb->dbname, f, l); + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); + } + Pthread_mutex_unlock(&fdbs.arr_mtx); + return fdb; } /** - * Retrieve a foreign db object - * The callers of this function should make sure a table lock is acquired - * Such by calling fdb_lock_table(). - * + * This matches with either a get_fdb() or _new_fdb() + * It releases the lock, and possibly unlink and free + * the structure if flag specifies that */ -fdb_t *get_fdb(const char *dbname) -{ - fdb_t *fdb = NULL; +void put_fdb_int(fdb_t *fdb, enum fdb_put_flag flag, const char *f, int l) +{ + Pthread_mutex_lock(&fdbs.arr_mtx); + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Unlocking fdb %s with flag %d at %s:%d\n", fdb->dbname, flag, f, l); + Pthread_rwlock_unlock(&fdb->inuse_rwlock); + if (flag == FDB_PUT_TRYFREE) { + /* try to get a wrlock on the fdb; + * if this succeeds, there are no readers, therefore + * it is safe to remove; otherwise, there is a + * reader that will take over + */ + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Trywrlock fdb %s with flag %d\n", fdb->dbname, flag); + if (pthread_rwlock_trywrlock(&fdb->inuse_rwlock) == 0) { + _cache_unlink_fdb(fdb); + /* after this, the fdb is not foundable anymore */ + } else { + flag = FDB_PUT_NOFREE; /* do not free, there are readers */ + } + } else if (flag == FDB_PUT_FORCEFREE) { + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Writelock fdb %s with flag %d\n", fdb->dbname, flag); + Pthread_rwlock_wrlock(&fdb->inuse_rwlock); + _cache_unlink_fdb(fdb); + } + Pthread_mutex_unlock(&fdbs.arr_mtx); - Pthread_rwlock_rdlock(&fdbs.arr_lock); - fdb = __cache_fnd_fdb(dbname, NULL); -#if 0 - NOTE: we will rely on table locks instead of this! - if(fdb) - { - __fdb_add_user(fdb, 0); - } -#endif - Pthread_rwlock_unlock(&fdbs.arr_lock); - return fdb; + if (flag != FDB_PUT_NOFREE) { + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Unlocking fdb %s checked flag %d\n", fdb->dbname, flag); + Pthread_rwlock_unlock(&fdb->inuse_rwlock); + _free_fdb(fdb); + } } -static void init_fdb(fdb_t * fdb, const char * dbname, enum mach_class class, int local, int class_override) +static void _init_fdb(fdb_t *fdb, const char *dbname, enum mach_class class, int local, int class_override) { fdb->dbname = strdup(dbname); fdb->class = class; @@ -510,35 +542,44 @@ static void init_fdb(fdb_t * fdb, const char * dbname, enum mach_class class, in */ fdb->server_version = gbl_fdb_default_ver; fdb->dbname_len = strlen(dbname); - fdb->users = 1; fdb->local = local; + Pthread_rwlock_init(&fdb->inuse_rwlock, NULL); + Pthread_mutex_init(&fdb->tables_mtx, NULL); + listc_init(&fdb->tables, offsetof(struct fdb_tbl, lnk)); fdb->h_ents_rootp = hash_init_i4(0); fdb->h_ents_name = hash_init_strptr(offsetof(struct fdb_tbl_ent, name)); fdb->h_tbls_name = hash_init_strptr(0); - Pthread_rwlock_init(&fdb->h_rwlock, NULL); Pthread_mutex_init(&fdb->sqlstats_mtx, NULL); Pthread_mutex_init(&fdb->dbcon_mtx, NULL); - Pthread_mutex_init(&(fdb->users_mtx), NULL); } /** - * Adds a new foreign db to the local cache - * If it already exists, created is set to 0 - * and users incremented. Otherwise created - * is set and the db is created. + * Check if a "dbname" fdb exists, if it not, create one. + * The return fdb is read locked. Check/add is done under lock arr_mtx * */ -static fdb_t *new_fdb(const char *dbname, int *created, enum mach_class class, - int local, int class_override) +static fdb_t *_new_fdb(const char *dbname, int *created, enum mach_class class, int local, int class_override) { int rc = 0; fdb_t *fdb; - Pthread_rwlock_wrlock(&fdbs.arr_lock); - fdb = __cache_fnd_fdb(dbname, NULL); + Pthread_mutex_lock(&fdbs.arr_mtx); + + /* there is no more exclusive long term lock for fdb, only read lock + * we are using the exclusive mutex to progress the dlock1 test + */ + if (_test_trap_dlock1 == 2) { + _test_trap_dlock1++; + } + + fdb = _cache_fnd_fdb(dbname, NULL); if (fdb) { - assert(class == fdb->class); - __fdb_add_user(fdb, 0); + assert(class == fdb->class && local == fdb->local); + + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking existing fdb %s\n", fdb->dbname); + + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); *created = 0; goto done; @@ -551,16 +592,21 @@ static fdb_t *new_fdb(const char *dbname, int *created, enum mach_class class, goto done; } - init_fdb(fdb, dbname, class, local, class_override); + _init_fdb(fdb, dbname, class, local, class_override); - /* this should be safe to call even though the fdb is not booked in the fdb - * array */ - __fdb_add_user(fdb, 0); + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking new fdb %s\n", fdb->dbname); - rc = __cache_link_fdb(fdb); + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); + + rc = _cache_link_fdb(fdb); if (rc) { /* this was not visible, free it here */ - __free_fdb(fdb); + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Unlocking new fdb %s due to link error\n", fdb->dbname); + + Pthread_rwlock_unlock(&fdb->inuse_rwlock); + _free_fdb(fdb); fdb = NULL; *created = 0; } else { @@ -568,10 +614,9 @@ static fdb_t *new_fdb(const char *dbname, int *created, enum mach_class class, } done: - Pthread_rwlock_unlock(&fdbs.arr_lock); + Pthread_mutex_unlock(&fdbs.arr_mtx); /* At this point, if we've created a new fdb, - it is findable by others and users might - increase/decrease independently */ + it is findable by others */ if (_test_trap_dlock1 == 1) { _test_trap_dlock1 = 2; @@ -582,42 +627,13 @@ static fdb_t *new_fdb(const char *dbname, int *created, enum mach_class class, } return fdb; - - /* returns NULL if error or fdb with fdb->users incremented */ + /* returns NULL if error or fdb with inuse_rwlock live lock acquired in read mode */ } void destroy_local_fdb(fdb_t *fdb) { if (fdb) - __free_fdb(fdb); - -} - -/** - * Try to destroy the session; - * only done when connecting to unexisting dbs - * If somehow there are other clients, ignore - * this. - */ -static void destroy_fdb(fdb_t *fdb) -{ - if (!fdb) - return; - - Pthread_rwlock_wrlock(&fdbs.arr_lock); - - /* if there are any users, don't touch the db */ - Pthread_mutex_lock(&fdb->users_mtx); - fdb->users--; - if (fdb->users == 0) { - __cache_unlink_fdb(fdb); - Pthread_mutex_unlock(&fdb->users_mtx); - __free_fdb(fdb); - } else { - Pthread_mutex_unlock(&fdb->users_mtx); - } - - Pthread_rwlock_unlock(&fdbs.arr_lock); + _free_fdb(fdb); } int is_local(const fdb_t *fdb) @@ -627,26 +643,8 @@ int is_local(const fdb_t *fdb) /************** TABLE OPERATIONS ***************/ -/** - * Free an unlinked table structure - * Unlocked, needs tbls_mtx - * - */ -void __fdb_free_table(fdb_t *fdb, fdb_tbl_t *tbl) -{ - free(tbl->name); - Pthread_mutex_destroy(&tbl->ents_mtx); - free(tbl); -} - -/** - * Add a new table to the foreign db. Also, - * retrieves the current sql master row, if possible - * - * Note: fdb object cannot go away because it has users>0 - * - */ -static fdb_tbl_t *_alloc_table_fdb(fdb_t *fdb, const char *tblname) +/* Allocate a fdb_tbl entry */ +static fdb_tbl_t *_alloc_fdb_tbl(const char *tblname) { fdb_tbl_t *tbl; @@ -659,9 +657,9 @@ static fdb_tbl_t *_alloc_table_fdb(fdb_t *fdb, const char *tblname) tbl->name = strdup(tblname); tbl->name_len = strlen(tblname); - tbl->fdb = fdb; - Pthread_mutex_init(&tbl->ents_mtx, NULL); + Pthread_rwlock_init(&tbl->table_lock, NULL); listc_init(&tbl->ents, offsetof(struct fdb_tbl_ent, lnk)); + Pthread_mutex_init(&tbl->need_version_mtx, NULL); return tbl; } @@ -673,95 +671,143 @@ enum table_status { }; /** * Check if the table exists and has the right version - * NOTE: registered as a fdb user so fdb does not get removed + * NOTE: this is called when adding a table to an sqlite engine + * during prepare, and the fdb is read locked (live lock), and + * the tables_mtx is acquired + * status is: + * - TABLE_EXISTS if table exists and has the right version + * - TABLE_STALE, if the table exists but has old version + * - TABLE_MISSING, otherwise * + * !!NOTE!!: only calls this when tables_mtx is acquired + * NOTE2: remote_version is -1ULL if not retrieved */ -static int _table_exists(fdb_t *fdb, const char *table_name, - enum table_status *status, int *version) +static fdb_tbl_t *_table_exists(fdb_t *fdb, const char *table_name, enum table_status *status, int *version, + unsigned long long remote_version) { - unsigned long long remote_version; fdb_tbl_t *table; - int rc = FDB_NOERR; - struct errstat err = {0}; *status = TABLE_MISSING; + //!!NOTE!!: only calls this when tables_mtx is acquired table = hash_find_readonly(fdb->h_tbls_name, &table_name); if (table) { *status = TABLE_EXISTS; + Pthread_mutex_lock(&table->need_version_mtx); /* ok, table exists, HURRAY! Is the table marked obsolete? */ if (table->need_version && (table->version != (table->need_version - 1))) { *status = TABLE_STALE; } else { - if (comdb2_get_verify_remote_schemas()) { - /* this is a retry for an already */ - rc = fdb_get_remote_version(fdb->dbname, table_name, fdb->class, - fdb->loc == NULL, &remote_version, &err); - if (rc == FDB_NOERR) { - if (table->version != remote_version) { - logmsg(LOGMSG_WARN, "Remote table %s.%s new version is " - "%lld, cached %lld\n", - fdb->dbname, table_name, remote_version, - table->version); - table->need_version = remote_version + 1; - *status = TABLE_STALE; - } else { - /* table version correct, make sure to pass this - * upstream */ - *version = table->version; - } - } else { - logmsg(LOGMSG_ERROR, "Lookup table %s failed \"%s\"\n", - table_name, err.errstr); - return FDB_ERR_GENERIC; - } + if (remote_version != -1ULL && table->version != remote_version) { + logmsg(LOGMSG_WARN, + "Remote table %s.%s new version is " + "%lld, cached %lld\n", + fdb->dbname, table_name, remote_version, table->version); + table->need_version = remote_version + 1; + *status = TABLE_STALE; } else { + /* table version correct, make sure to pass this upstream */ *version = table->version; } } + Pthread_mutex_unlock(&table->need_version_mtx); + } + return table; +} - /* NOTE: we don't prepopulate sql engines at creation - with schema for already existing fdbs; therefore, this code - falts in to update the new sql engine on demand. This trace - would spew in such a case, which we don't want to. - */ - /* - fprintf(stderr, "%s: table \"%s\" in db \"%s\" already exist %d!\n", - __func__, table_name, fdb->dbname, *version); - if(!*version) - abort(); +/** + * Expand _table_exists to handle the case when the table exist but is stale; + * in this case, we unlink the existing table (and free it if no users)/ + * //!!NOTE!!: only calls this when tables_mtx is acquired + */ +static fdb_tbl_t *_table_exists_and_not_stale(fdb_t *fdb, const char *table_name, int *version, + unsigned long long remote_version) +{ + fdb_tbl_t *remtbl; + enum table_status status; + + /* check if the table exists */ + remtbl = _table_exists(fdb, table_name, &status, version, remote_version); + if (status == TABLE_EXISTS) { + /* table exists and has the right version; we still have the fdb read locked so + * we can proceed with updating the sqlite engine schema + * + * the table is not yet locked, we are still in prepare phase; but the sqlite will + * have the info cached, and when we get the table locks, we will acquire and check + * the cached version again */ + assert(remtbl); + /* collect stats tables also */ + return remtbl; } + if (!remtbl) /* not existing */ + return NULL; - return FDB_NOERR; + /* status was TABLE_STALE, need to remove table */ + + Pthread_mutex_lock(&remtbl->need_version_mtx); + if (!remtbl->need_version || ((remtbl->need_version - 1) == remtbl->version)) { + /* table was fixed in the meantime!, drop exclusive lock */ + *version = remtbl->version; + Pthread_mutex_unlock(&remtbl->need_version_mtx); + + return remtbl; + } + + /* table is still stale, remove */ + if (gbl_fdb_track) + logmsg(LOGMSG_USER, "Detected stale table \"%s.%s\" version %llu required %d\n", remtbl->fdb->dbname, + remtbl->name, remtbl->version, remtbl->need_version - 1); + + /* this is done under fdb tables_mutex lock */ + _unlink_fdb_table(fdb, remtbl); + + Pthread_mutex_unlock(&remtbl->need_version_mtx); + + /* at this point the remote table is only this thread and + * existing reader sqlite engines + * try to free it if this is last reader + */ + _try_free_fdb_tbl(fdb, remtbl); + + return NULL; } /** * Handling sqlite_stats; they have been temporarely added but linked - * to original table tbl; (I did that to run only a query first time) + * to original table tbl. * They really belong to the fdb, lets properly link them now + * This is done with fdb live lock and tables_mtx acquired + * The table is not yet linked in fdb (not visible) * * Returns -1 for ENOMEM or if cannot find stat_name + * */ -int fix_table_stats(fdb_t *fdb, fdb_tbl_t *tbl, const char *stat_name) +fdb_tbl_t *_fix_table_stats(fdb_t *fdb, fdb_tbl_t *tbl, const char *stat_name) { fdb_tbl_t *stat_tbl; fdb_tbl_ent_t *stat_ent; /* alloc table */ - stat_tbl = _alloc_table_fdb(fdb, stat_name); + stat_tbl = _alloc_fdb_tbl(stat_name); if (!stat_tbl) { logmsg(LOGMSG_ERROR, "%s: OOM %s for %p\n", __func__, stat_name, tbl); - return -1; + return NULL; } - stat_ent = get_fdb_tbl_ent_by_name_from_fdb(fdb, stat_name); + /* this is a table not yet linked in the fdb */ + LISTC_FOR_EACH(&tbl->ents, stat_ent, lnk) + { + if (strncasecmp(stat_name, stat_ent->name, strlen(stat_name)) == 0) { + break; + } + } if (!stat_ent) { logmsg(LOGMSG_ERROR, "%s: Cannot find %s for %p\n", __func__, stat_name, tbl); - return -1; + return NULL; } /* fprintf(stderr, "XYXY: for \"%s\" fixing table from \"%s\" to \"%s\"\n", @@ -769,7 +815,7 @@ int fix_table_stats(fdb_t *fdb, fdb_tbl_t *tbl, const char *stat_name) */ /* we need to move this from ent->tbl->ents to tbl_stat->ents */ - listc_rfl(&stat_ent->tbl->ents, stat_ent); + listc_rfl(&tbl->ents, stat_ent); stat_ent->tbl = stat_tbl; stat_ent->tbl->version = stat_ent->_version; listc_abl(&stat_tbl->ents, stat_ent); @@ -777,210 +823,348 @@ int fix_table_stats(fdb_t *fdb, fdb_tbl_t *tbl, const char *stat_name) if (gbl_fdb_track) logmsg(LOGMSG_USER, "Linking %s to %s\n", stat_tbl->name, fdb->dbname); - hash_add(fdb->h_tbls_name, stat_tbl); - return 0; + return stat_tbl; +} + +/** + * Delete one of the entries from table + * one use: if stat1 does no exists remotely, but stat4 exists, ignore it and deleted it + */ +void _remove_table_stat(fdb_t *fdb, fdb_tbl_t *tbl, const char *stat_name) +{ + fdb_tbl_ent_t *stat_ent; + + LISTC_FOR_EACH(&tbl->ents, stat_ent, lnk) + { + if (strncasecmp(stat_name, stat_ent->name, strlen(stat_name)) == 0) { + break; + } + } + if (stat_ent) { + listc_rfl(&tbl->ents, stat_ent); + free(stat_ent->ent); + free(stat_ent->name); + free(stat_ent); + } + + fdb->has_sqlstat4 = 0; } /** - * Add a table and index stats if any; acquires exclusive access to fdb cache + * Check if the table "table_name" exists, and if it does, it the version is matching + * remote version. If it does not, we retrieve the table from remote; we update local + * table if it exists and it is stale. + * If sqlite stats do not exist either, retrieve them too. + * Upon success, we return with table and stats locked + * + * NOTE: all work is done under tables_mtx, except for initial remote version check + * NOTE2: obviously there is a read live lock for fdb (obtained by _new_fdb()) + * NOTE3: need to handle special cases when the query asks for sqlite_master or stats * */ -static int _add_table_and_stats_fdb(fdb_t *fdb, const char *table_name, - int *version, int in_analysis_load) +static int _add_table_and_stats_fdb(sqlclntstate *clnt, sqlite3InitInfo *init, fdb_t *fdb, const char *table_name, + int *version) { - enum table_status status; + unsigned long long remote_version = -1ULL; + struct errstat err = {0}; int rc = FDB_NOERR; - fdb_tbl_t *tbl; + fdb_tbl_t *tbl = NULL, *stat1 = NULL, *stat4 = NULL; int initial; - fdb_tbl_ent_t *found_ent; - int is_sqlite_master; /* corner case when sqlite_master is the first query - remote; - there is no "sqlite_master" entry for - sqlite_master, but + fdb_tbl_ent_t *found_ent = NULL; + int link_table = 0, link_stat1 = 0, link_stat4 = 0; + int is_sqlite_master; /* corner case when sqlite_master is the first query remote; + there is no "sqlite_master" entry for sqlite_master, but that doesn't make the case here to fail */ - /* check if the table exists, and if it does need refreshing - if it exists and has right version, grab the version and return */ - rc = _table_exists(fdb, table_name, &status, version); - if (rc == FDB_NOERR && status == TABLE_EXISTS) { - /* fdb unlocked, users incremented */ - goto nop; - } else if (rc != FDB_NOERR) { - logmsg(LOGMSG_WARN, "failure to connect to remote %s.%s\n", fdb->dbname, - table_name); - goto nop; - } - - /* NOTE: since this function is called recursively to add sqlite_stat* for a - * table as well - * we make sure we acquire a lock only once, for the initial call. - * NOTE2: it is possible that the sqlite engine adds another table, but it - * doesn't have the - * schema for sqlite_stat, calling into sqlite3AddAndLock again where stats - * are loaded; - * in this case we do already have an exclude lock so we skip the locking as - * well - */ - if (!in_analysis_load) { - /* since we removed ourselves, it is possible that the fdb object will - go away - in this case, we need to get an exclusive lock while syncronizing - with the - destroy_fdb process; we need to use a copy of fdb->dbname instead of - volative fdb object */ - char *tmpname = strdup(fdb->dbname); - - /* new_fdb bumped this up, we need exclusive lock, get ourselves out */ - __fdb_rem_user(fdb, 0); - - rc = __lock_wrlock_exclusive(tmpname); - free(tmpname); - if (rc) { - if (rc == FDB_ERR_FDB_NOTFOUND) { - /* the db got deleted from under us, start fresh */ - return rc; - } - logmsg(LOGMSG_ERROR, "%s: fail to lock rc=%d!\n", __func__, rc); - return rc; - } + int is_sqlite_stat1; + int is_sqlite_stat4; + const char *sqlite_stat1 = "sqlite_stat1"; /* hash keys */ + const char *sqlite_stat4 = "sqlite_stat4"; - /* add ourselves back */ - __fdb_add_user(fdb, 0); - - /* remove the stale table here */ - /* ok, stale; we need to garbage this one out */ - fdb_tbl_t *remtbl = hash_find_readonly(fdb->h_tbls_name, &table_name); - /* anything is possible with the table while waiting for exclusive - * fdb - * lock */ - if (remtbl) { - /* table is still around */ - if (!remtbl->need_version || - ((remtbl->need_version - 1) == remtbl->version)) { - /* table was fixed in the meantime!, drop exclusive lock */ - rc = FDB_NOERR; - *version = remtbl->version; - goto done; + is_sqlite_master = (strcasecmp(table_name, "sqlite_master") == 0); + is_sqlite_stat1 = (strcasecmp(table_name, "sqlite_stat1") == 0); + is_sqlite_stat4 = (strcasecmp(table_name, "sqlite_stat4") == 0); + + /* when attaching a new table (and possible stats), we save them in the sqlite. + * table and stats will be read locked; once sqlite is done setting up the new + * table and stats, it will call fdbUnlock to release these locks. + */ + init->locked_table = NULL; + init->locked_stat1 = NULL; + init->locked_stat4 = NULL; + init->fdb = NULL; + + /* this is the case when a column is not found, and we assume that it is missing + * from local cache; retrieve remote table version to check for that + * (sqlite_master has no version) + */ + if (comdb2_get_verify_remote_schemas(clnt) && !is_sqlite_master) { + /* this is a retry for an already */ + rc = fdb_get_remote_version(fdb->dbname, table_name, fdb->class, fdb->loc == NULL, &remote_version, &err); + if (rc != FDB_NOERR) { + if (strncasecmp(err.errstr, "table not found", strlen(err.errstr)) == 0) { + return FDB_ERR_FDB_TBL_NOTFOUND; } else { - /* table is still stale, remove */ - if (gbl_fdb_track) - logmsg(LOGMSG_USER, - "Detected stale table \"%s.%s\" " - "version %llu required %d\n", - remtbl->fdb->dbname, remtbl->name, remtbl->version, - remtbl->need_version - 1); - - if (__free_fdb_tbl(remtbl, fdb)) { - logmsg(LOGMSG_ERROR, - "Error clearing schema for table " - "\"%s\" in db \"%s\"\n", - table_name, fdb->dbname); - } + logmsg(LOGMSG_ERROR, "Lookup table %s failed \"%s\"\n", table_name, err.errstr); + return FDB_ERR_GENERIC; } } } - /* is this the first table? grab sqlite_stats too */ - initial = fdb_num_entries(fdb) == 0; + Pthread_mutex_lock(&fdb->tables_mtx); + + tbl = _table_exists_and_not_stale(fdb, table_name, version, remote_version); + if (tbl) { + rc = FDB_NOERR; + goto done; + } + + /* need to retrieve new table an link it (and maybe stats too) */ + link_table = 1; + + initial = _num_entries(fdb) == 0; + + if (!initial) { + /* corner case: it is possible that initially the remote + * db had no stats tables; check if we have sqlite_stat1 + * local, and if it is not, we consider this case + * initial + */ + if (hash_find_readonly(fdb->h_tbls_name, &sqlite_stat1) == NULL) { + initial = 1; + } + } /* create the table object */ - tbl = _alloc_table_fdb(fdb, table_name); + tbl = _alloc_fdb_tbl(table_name); if (!tbl) { rc = FDB_ERR_MALLOC; goto done; } + /* we will do the remote schema and stats retrieval without blocking fdb->tables_mutex; + * this can end up with redundant retries, but it will not block the hash during this time + */ + Pthread_mutex_unlock(&fdb->tables_mtx); + /* this COULD be taken out of tbls_mtx, but I want to clear table under lock so I don't add garbage table structures when mispelling */ - is_sqlite_master = (strcasecmp(table_name, "sqlite_master") == 0); - found_ent = NULL; - rc = check_table_fdb(fdb, tbl, initial, &found_ent, is_sqlite_master); + rc = _retrieve_fdb_tbl(fdb, tbl, initial); + + /* lock the tables_mtx again, check if the table was added already + * if table exists already, simply remove the dup here; otherwise + * proceed + */ + Pthread_mutex_lock(&fdb->tables_mtx); + + /* we have retrieved that table and its version; use that here instead of remote_version + * to check for the corner case when someone already added the table, and it is + * already stale + */ + fdb_tbl_t *remtbl = _table_exists_and_not_stale(fdb, table_name, version, tbl->version); + if (remtbl) { + /* table was already added with the right version */ + _free_fdb_tbl(fdb, tbl); + tbl = remtbl; + goto done; + } + + /* we are the first here to try to link the table in fdb */ + if (rc == FDB_NOERR) { + fdb_tbl_ent_t *ent; + LISTC_FOR_EACH(&tbl->ents, ent, lnk) + { + if (strcasecmp(ent->name, "sqlite_stat1") == 0) { + fdb->has_sqlstat1 = 1; + } + if (strcasecmp(ent->name, "sqlite_stat4") == 0) { + fdb->has_sqlstat4 = 1; + } + if (strcasecmp(ent->name, tbl->name) == 0) { + found_ent = ent; + } + } + } if (rc != FDB_NOERR || (!found_ent && !is_sqlite_master)) { *version = 0; - /* we might have populated the tbl with sqlite_stat-s - remove them */ - __free_fdb_tbl(tbl, fdb); - if (rc == FDB_NOERR) + if (rc == FDB_NOERR) { + logmsg(LOGMSG_ERROR, "%s: unable to find schema for %s.%s rc =%d\n", __func__, fdb->dbname, tbl->name, rc); + rc = FDB_ERR_FDB_TBL_NOTFOUND; + } + + /* we cannot find the table; remove fdb_tbl, not linked in yet */ + _free_fdb_tbl(fdb, tbl); + tbl = NULL; goto done; } - /* so, we have a new found the table in remote schema, lets add - it to the fdb */ if (!is_sqlite_master) { - if (gbl_fdb_track) - logmsg(LOGMSG_USER, "Linking %s to %s\n", tbl->name, fdb->dbname); - hash_add(fdb->h_tbls_name, tbl); - *version = fdb_table_version(found_ent->_version); } else { *version = 0; } + /* create indedependet fdb_tbl for sqlite_stats*/ if (initial) { /* we have a table, lets get the sqlite_stats */ - if (fdb->has_sqlstat1 && - strncasecmp(table_name, "sqlite_stat1", 13) != 0) { - rc = fix_table_stats(fdb, tbl, "sqlite_stat1"); - if (rc) { + if (fdb->has_sqlstat1 && !is_sqlite_stat1) { + stat1 = _fix_table_stats(fdb, tbl, "sqlite_stat1"); + if (!stat1) { + rc = FDB_ERR_GENERIC; goto done; } + link_stat1 = 1; } - if (fdb->has_sqlstat4 && - strncasecmp(table_name, "sqlite_stat4", 13) != 0) { - rc = fix_table_stats(fdb, tbl, "sqlite_stat4"); - if (rc) { - goto done; + if (fdb->has_sqlstat4 && !is_sqlite_stat4) { + if (fdb->has_sqlstat1) { + stat4 = _fix_table_stats(fdb, tbl, "sqlite_stat4"); + if (!stat4) { + rc = FDB_ERR_GENERIC; + goto done; + } + link_stat4 = 1; + } else { + /* artificial corner case: no stat1 but stat4 */ + _remove_table_stat(fdb, tbl, "sqlite_stat4"); } } } if (is_sqlite_master) { /* a dummy sqlite_master tbl was added, we need to remove it here */ - __free_fdb_tbl(tbl, fdb); + _free_fdb_tbl(fdb, tbl); tbl = NULL; } rc = FDB_NOERR; + /* we are ready to link the new table (and stats) in fdb and also in clnt cache */ + done: + if (rc == FDB_NOERR) { + /* do we need to cache stats locally ? + * handle corner cases when tbl is a stat actually + */ + int get_stat1 = 0; + int get_stat4 = 0; + if (is_sqlite_stat1) { + stat1 = tbl; + link_stat1 = link_table; + tbl = NULL; + /* get stat4 too */ + if (!stat4) + /* two case scenarios: + * 1) first table accessed for fdb is sqlite_stat1; stat4 is not hashed, + * but we have it in stat4 as returned by _fix_table_stats + * 2) not initial, in which has it should be already hashed + * in and we need to retrieve it, if it exists + */ + get_stat4 = 1; + } else if (is_sqlite_stat4) { + stat4 = tbl; + link_stat4 = link_table; + tbl = NULL; + /* get stat1 too */ + if (!stat1) + /* see caveat above in is_sqlite_stat1 case */ + get_stat1 = 1; + } else { + /* we need stat1 and stat4 if we do not have them already */ + if (!stat1) + get_stat1 = 1; + if (!stat4) + get_stat4 = 1; + } + if (get_stat1 && fdb->has_sqlstat1) + stat1 = hash_find_readonly(fdb->h_tbls_name, &sqlite_stat1); + if (get_stat4 && fdb->has_sqlstat4) + stat4 = hash_find_readonly(fdb->h_tbls_name, &sqlite_stat4); + + /* cache the entries in clnt */ + if (tbl) { + rc = _clnt_cache_add_tbl(clnt, tbl); + if (rc != FDB_NOERR) + goto done; + } + if (stat1) { + rc = _clnt_cache_add_tbl(clnt, stat1); + if (rc != FDB_NOERR) + goto done; + } + if (stat4) { + rc = _clnt_cache_add_tbl(clnt, stat4); + if (rc != FDB_NOERR) + goto done; + } - /* unlock the mutex only if acquired */ - if (!in_analysis_load) { - Pthread_rwlock_unlock(&fdb->h_rwlock); + /* get the table locks before returning; do this before linking to fdb */ + if (tbl) { + /* this will let us access tbl ents during sqlite3InitTable call */ + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking fdb %s for setup %s\n", fdb->dbname, tbl->name); + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); + Pthread_rwlock_rdlock(&tbl->table_lock); + init->locked_table = tbl; + } + if (stat1) { + /* this lets us collect sqlite_stat1 ents as well during sqlite3InitTable call */ + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking fdb %s for setup %s\n", fdb->dbname, stat1->name); + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); + Pthread_rwlock_rdlock(&stat1->table_lock); + init->locked_stat1 = stat1; + } + if (stat4) { + /* this lets us collect sqlite_stat4 ents as well during sqlite3InitTable call */ + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking fdb %s for setup %s\n", fdb->dbname, stat4->name); + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); + Pthread_rwlock_rdlock(&stat4->table_lock); + init->locked_stat4 = stat4; + } + init->fdb = fdb; + + /* we are all set, all we have to do is to link the read locked tables to fdb */ + if (link_table && tbl) { + _link_fdb_table(fdb, tbl); + } + if (link_stat1 && stat1) { + _link_fdb_table(fdb, stat1); + } + if (link_stat4 && stat4) { + _link_fdb_table(fdb, stat4); + } } + /* done here, tables are visible and read locked */ + Pthread_mutex_unlock(&fdb->tables_mtx); -nop: return rc; } -/* NOT thread safe, need fdb->h_rw_lock */ -static int fdb_num_entries(fdb_t *fdb) +/* locked by fdb's tables_mtx */ +static int _num_entries(fdb_t *fdb) { int nents; - /* we use h_ents_rootp instead of h_tbls_name, since this is the last - * updated */ hash_info(fdb->h_ents_rootp, NULL, NULL, NULL, NULL, &nents, NULL, NULL); return nents; } /** - * Connects to the db and retrieve the current sql master row - * Checks cached sql master row and updates it and verid if the - * there was a schema change on the remote db - * NO thread safe (need exclusive fdb->h_rwlock) + * Connects to the db and retrieve the current schema for the table + * If this is the first time we connect to this db, retrieve also + * schema for sqlite_stats tables * - * NOTE: + * NOTE: we have tables_mtx locked when calling this */ -static int check_table_fdb(fdb_t *fdb, fdb_tbl_t *tbl, int initial, - fdb_tbl_ent_t **found_ent, int is_sqlite_master) +static int _retrieve_fdb_tbl(fdb_t *fdb, fdb_tbl_t *tbl, int initial) { BtCursor *cur; int rc = FDB_NOERR; @@ -1110,13 +1294,6 @@ static int check_table_fdb(fdb_t *fdb, fdb_tbl_t *tbl, int initial, break; } - if (!is_sqlite_master) - logmsg(LOGMSG_ERROR, "%s: unable to find schema for %s.%s rc =%d\n", - __func__, fdb->dbname, tbl->name, rc); - - if (*found_ent) - *found_ent = NULL; - goto close; } @@ -1133,8 +1310,7 @@ static int check_table_fdb(fdb_t *fdb, fdb_tbl_t *tbl, int initial, goto close; } - irc = insert_table_entry_from_packedsqlite(fdb, tbl, row, rowlen, - found_ent, versioned); + irc = _add_fdb_tbl_ent_from_packedksqlite(fdb->dbname, tbl, row, rowlen, versioned); if (irc) { rc = irc; goto close; @@ -1150,7 +1326,7 @@ static int check_table_fdb(fdb_t *fdb, fdb_tbl_t *tbl, int initial, if (rc == IX_FND || /* cdb2api does not know which row is the last */ - (rc == IX_EMPTY /* && *found_ent -- capture also missing table */)) + rc == IX_EMPTY) rc = FDB_NOERR; close: @@ -1205,7 +1381,6 @@ static enum mach_class get_fdb_class(const char **p_dbname, int *local, remote_lvl = my_lvl; /* accessed allowed implicitely */ } - /* TODO: check access permissions */ /* NOTE: for now, we only allow same class or local overrides. I will sleep better */ if (!gbl_fdb_allow_cross_classes && remote_lvl != my_lvl) { @@ -1216,109 +1391,8 @@ static enum mach_class get_fdb_class(const char **p_dbname, int *local, return remote_lvl; } -int comdb2_fdb_check_class(const char *dbname) -{ - fdb_t *fdb; - enum mach_class requested_lvl = CLASS_UNKNOWN; - int local; - int rc = 0; - - requested_lvl = get_fdb_class(&dbname, &local, NULL); - if (requested_lvl == CLASS_UNKNOWN) { - return -1; - } - - fdb = get_fdb(dbname); - if (!fdb) { - logmsg(LOGMSG_ERROR, "%s: fdb gone?\n", __func__); - rc = FDB_ERR_BUG; - goto done; - } - - if (fdb->class != requested_lvl) { - logmsg(LOGMSG_ERROR, "%s: cached fdb is a different class, failing\n", - __func__); - rc = FDB_ERR_CLASS_DENIED; - goto done; - } - -done: - - return rc; -} - -static int __check_sqlite_stat(sqlite3 *db, fdb_tbl_ent_t *ent, Table *tab) +static int _failed_AddAndLockTable(sqlclntstate *clnt, const char *dbname, int errcode, const char *prefix) { - /* incorrect version, unlikely */ - if (unlikely(ent && tab && (tab->version != ent->tbl->version))) { - logmsg(LOGMSG_ERROR, "Stale cache for \"%s.%s\", sql version=%u != " - "shared version=%llu\n", - ent->tbl->fdb->dbname, tab->zName, tab->version, - ent->tbl->version); - - return SQLITE_SCHEMA_REMOTE; - } - - /* incorrect rootpage numbers */ - if (ent && tab && (tab->tnum != ent->rootpage)) { - logmsg(LOGMSG_ERROR, "Stale cache for \"%s.%s\", wrong rootpage number " - "sqlite=%d shared=%d\n", - ent->tbl->fdb->dbname, tab->zName, tab->tnum, ent->rootpage); - - return SQLITE_SCHEMA_REMOTE; - } - - /* sqlite cached but not shared! */ - if (!ent && tab) { - logmsg(LOGMSG_ERROR, "Stale cache for \"%s.%s\", wrong rootpage number " - "sqlite=%d but not shared\n", - db->aDb[tab->iDb].zDbSName, tab->zName, tab->tnum); - - return SQLITE_SCHEMA_REMOTE; - } - - return SQLITE_OK; -} - -static int _fdb_check_sqlite3_cached_stats(sqlite3 *db, fdb_t *fdb) -{ - int rc = SQLITE_OK; - if (sqlite3_is_preparer(db)) - return SQLITE_OK; - - char *dbname = fdb->local == 0 ? fdb->dbname : - sqlite3_mprintf("LOCAL_%s", fdb->dbname); - fdb_tbl_ent_t *stat_ent; - Table *stat_tab; - - stat_ent = get_fdb_tbl_ent_by_name_from_fdb(fdb, "sqlite_stat1"); - stat_tab = sqlite3FindTableCheckOnly(db, "sqlite_stat1", dbname); - - if (__check_sqlite_stat(db, stat_ent, stat_tab) != SQLITE_OK) { - rc = SQLITE_SCHEMA_REMOTE; - goto remote; - } - - stat_ent = get_fdb_tbl_ent_by_name_from_fdb(fdb, "sqlite_stat4"); - stat_tab = sqlite3FindTableCheckOnly(db, "sqlite_stat4", dbname); - - if (__check_sqlite_stat(db, stat_ent, stat_tab) != SQLITE_OK) { - rc = SQLITE_SCHEMA_REMOTE; - goto remote; - } - -remote: - if (dbname != fdb->dbname) - sqlite3_free(dbname); - return rc; -} - -static int _failed_AddAndLockTable(const char *dbname, int errcode, - const char *prefix) -{ - struct sql_thread *thd = pthread_getspecific(query_info_key); - sqlclntstate *clnt = thd->clnt; - logmsg(LOGMSG_WARN, "Error rc %d \"%s\" for db \"%s\"\n", errcode, prefix, dbname); @@ -1353,7 +1427,7 @@ int create_local_fdb(const char *fdb_name, fdb_t **fdb) { logmsg(LOGMSG_ERROR, "%s: Failed to create new fdb\n", __func__); return FDB_ERR_MALLOC; } - init_fdb(*fdb, fdb_name, lvl, local, lvl_override); + _init_fdb(*fdb, fdb_name, lvl, local, lvl_override); return 0; } @@ -1367,11 +1441,11 @@ int create_local_fdb(const char *fdb_name, fdb_t **fdb) { * and returns SQLITE_ERROR so that sql can rollback * */ -int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, const char *table, - int *version, int in_analysis_load, - int *out_class, int *out_local, - int *out_class_override, int *out_proto_version) +int sqlite3AddAndLockTable(sqlite3InitInfo *init, const char *dbname, const char *table, int *version, int *out_class, + int *out_local, int *out_class_override, int *out_proto_version) { + struct sql_thread *thd = pthread_getspecific(query_info_key); + sqlclntstate *clnt = thd->clnt; fdb_t *fdb; int rc = FDB_NOERR; int created = 0; @@ -1383,30 +1457,29 @@ int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, const char *table, lvl = get_fdb_class(&dbname, &local, &lvl_override); if (lvl == CLASS_UNKNOWN || lvl == CLASS_DENIED) { - return _failed_AddAndLockTable( - dbname, - (lvl == CLASS_UNKNOWN) ? FDB_ERR_CLASS_UNKNOWN - : FDB_ERR_CLASS_DENIED, - (lvl == CLASS_UNKNOWN) ? "unrecognized class" : "denied access"); - } -retry_fdb_creation: - fdb = new_fdb(dbname, &created, lvl, local, lvl_override); + return _failed_AddAndLockTable(clnt, dbname, + (lvl == CLASS_UNKNOWN) ? FDB_ERR_CLASS_UNKNOWN : FDB_ERR_CLASS_DENIED, + (lvl == CLASS_UNKNOWN) ? "unrecognized class" : "denied access"); + } + + /* try to find or create the fdb; + * the returned fdb, created or not, is read locked (live lock inuse_rwlock) + * fdb is visible to other clients + */ + fdb = _new_fdb(dbname, &created, lvl, local, lvl_override); if (!fdb) { /* we cannot really alloc a new memory string for sqlite here */ - return _failed_AddAndLockTable(dbname, FDB_ERR_MALLOC, - "OOM allocating fdb object"); + return _failed_AddAndLockTable(clnt, dbname, FDB_ERR_MALLOC, "OOM allocating fdb object"); } if (!created) { /* we need to validate requested class to existing class */ - rc = _validate_existing_table(fdb, lvl, local); + rc = _validate_existing_fdb(fdb, lvl, local); if (rc != FDB_NOERR) { - __fdb_rem_user(fdb, 1); - return _failed_AddAndLockTable(dbname, rc, "mismatching class"); + put_fdb(fdb, FDB_PUT_NOFREE); + return _failed_AddAndLockTable(clnt, dbname, rc, "mismatching class"); } } - /* NOTE: FROM NOW ON, CREATED FDB IS VISIBLE TO OTHER THREADS! */ - /* hack: sqlite stats are inheriting the present db lvl */ if (!created && is_sqlite_stat(table)) { lvl = fdb->class; @@ -1445,15 +1518,9 @@ int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, const char *table, } } - /* the bellow will exclusively lock fdb, and bump users before releasing - the lock and returning */ - rc = _add_table_and_stats_fdb(fdb, table, version, in_analysis_load); + /* this is table(s) operation, and work is done under fdb's tables_mtx */ + rc = _add_table_and_stats_fdb(clnt, init, fdb, table, version); if (rc != FDB_NOERR) { - if (rc == FDB_ERR_FDB_NOTFOUND) { - /* fdb deleted from under us by creator thread */ - goto retry_fdb_creation; - } - if (rc != FDB_ERR_SSL) logmsg(LOGMSG_ERROR, "%s: failed to add foreign table \"%s:%s\" rc=%d\n", @@ -1461,10 +1528,9 @@ int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, const char *table, switch (rc) { case FDB_ERR_FDB_TBL_NOTFOUND: { - /* ignore sqlite_stat not found during in_analysis_load */ - if (in_analysis_load && strncasecmp(table, "sqlite_stat", strlen("sqlite_stat"))== 0) { - /* decrement the local bump */ - __fdb_rem_user(fdb, 0); + /* ignore sqlite_stat not found during sqlite3AnalysisLoad in sqlite3Init */ + if (init->busy == 1 && strncasecmp(table, "sqlite_stat", strlen("sqlite_stat")) == 0) { + put_fdb(fdb, FDB_PUT_NOFREE); return SQLITE_ERROR; } snprintf(errstr, sizeof(errstr), "no such table \"%s\"", table); @@ -1497,35 +1563,13 @@ int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, const char *table, } error: - /* decrement the local bump */ - __fdb_rem_user(fdb, 0); - - /* if we've created this now, remove it since it could be a mistype */ - if (created) { - destroy_fdb(fdb); - fdb = NULL; - } - - return _failed_AddAndLockTable(dbname, rc, perrstr); + put_fdb(fdb, created ? FDB_PUT_TRYFREE : FDB_PUT_NOFREE); + return _failed_AddAndLockTable(clnt, dbname, rc, perrstr); } - /* We have successfully created a shared fdb table on behalf of an sqlite3 - engine - it is possible that sqlite_stat entries have changed, and prepare will - need - them to work (it is possible that they have stale schema/rootpage numbers - The following clears the entries if the sqlite_stat entries are stale */ - /* we need to check the sqlite_stats also, since they are not really locked + /* here we have the table read lock (table_lock) and possibly + * the stats as well (if required by sqlite engine initial setup) */ - if (_fdb_check_sqlite3_cached_stats(db, fdb) != SQLITE_OK) { - /* lets remove the cached sqlite_stat information; it will be retrieved - * fresh */ - fdb_clear_sqlite_cache(db, fdb->dbname, NULL); - } - - /* we return SQLITE_OK here, which tells the caller that the db is still - READ locked! - the caller will have to release that */ *out_class = lvl; *out_local = local; @@ -1536,156 +1580,46 @@ int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, const char *table, } /** - * Decrement users for AddAndLock callers - * - * Always able to find a fdb since it was locked + * Release the read locks used by sqlite engine initial setup + * Following this, tables can be removed or update, but sqlite + * has a cache of them and it will reacquire needed table read locks + * before returning a prepared statement. We check the table + * existence and version at that time and update if needed * + * NOTE: there is a choice tradeoff for releasing the locks: + * - we release read locks on table and stats once setup is done to avoid + * checking for them on every possible failure between engine setup + * and table locking phases. + * - during a remote table update, releasing the locks lets parallel sqlite engines + * proceed, instead of blocking for longer until this engine detects a stale + * version + * - this is consistent with existing way of protecting tables during query execution */ -int sqlite3UnlockTable(const char *dbname, const char *table) -{ - fdb_t *fdb; - - fdb = get_fdb(dbname); - if (!fdb) { - /* bug */ - logmsg(LOGMSG_FATAL, "Unable to find dbname \"%s\", BUG!\n", dbname); - abort(); - } - - __fdb_rem_user(fdb, 1); /* matches __fdb_add_user in sqlite3AddAndLockTable */ - - return SQLITE_OK; -} - -static int __lock_wrlock_shared(fdb_t *fdb) +void fdbUnlock(sqlite3InitInfo *init) { - int rc = FDB_NOERR; - - Pthread_rwlock_rdlock(&fdb->h_rwlock); - - return rc; -} + struct sql_thread *thd = pthread_getspecific(query_info_key); + sqlclntstate *clnt = thd->clnt; -static int __lock_wrlock_exclusive(char *dbname) -{ - fdb_t *fdb = NULL; - int rc = FDB_NOERR; - int idx = -1; - int len = strlen(dbname) + 1; + /* if we got lock tables for table and any stats, unlock them here */ + if (init->locked_table) { - if (_test_trap_dlock1 == 2) { - _test_trap_dlock1++; + fdb_unlock_table(clnt, ((fdb_tbl_t *)init->locked_table)->ents.top); + init->locked_table = NULL; } - - do { - Pthread_rwlock_rdlock(&fdbs.arr_lock); - if (!(idx >= 0 && idx < fdbs.nused && fdbs.arr[idx] == fdb && - strncasecmp(dbname, fdbs.arr[idx]->dbname, len) == 0)) { - fdb = __cache_fnd_fdb(dbname, &idx); - } - - if (!fdb) { - Pthread_rwlock_unlock(&fdbs.arr_lock); - return FDB_ERR_FDB_NOTFOUND; - } - - Pthread_rwlock_wrlock(&fdb->h_rwlock); - - /* we got the lock, are there any lockless users ? */ - if (fdb->users > 1) { - Pthread_rwlock_unlock(&fdb->h_rwlock); - Pthread_rwlock_unlock(&fdbs.arr_lock); - - /* if we loop, make sure this is not a live lock - deadlocking with another sqlite engine that waits - for a bdb write lock to be processed */ - - struct sql_thread *thd = pthread_getspecific(query_info_key); - if (!thd) - continue; - - rc = clnt_check_bdb_lock_desired(thd->clnt); - if (rc) { - logmsg(LOGMSG_ERROR, "%s:%d recover_deadlock returned %d\n", - __func__, __LINE__, rc); - return FDB_ERR_GENERIC; - } - - continue; - } else { - rc = FDB_NOERR; - break; /* own fdb */ - } - } while (1); /* 1 is the creator */ - - Pthread_rwlock_unlock(&fdbs.arr_lock); - - return rc; -} - -static fdb_tbl_ent_t *get_fdb_tbl_ent_by_rootpage_from_fdb(fdb_t *fdb, - int rootpage) -{ - fdb_tbl_ent_t *ent; - - __lock_wrlock_shared(fdb); - ent = hash_find_readonly(fdb->h_ents_rootp, &rootpage); - Pthread_rwlock_unlock(&fdb->h_rwlock); - - return ent; -} - -/* NOTE: ALERT! always call this with h_rwlock acquired; as of now - add_table_and_stats_fdb - has WR lock on it and it is undefined behaviour to get the read lock here */ -static fdb_tbl_ent_t *get_fdb_tbl_ent_by_name_from_fdb(fdb_t *fdb, - const char *name) -{ - fdb_tbl_ent_t *ent; - /* - Pthread_rwlock_rdlock(&fdb->h_rwlock); - */ - ent = hash_find_readonly(fdb->h_ents_name, &name); - /* - Pthread_rwlock_unlock(&fdb->h_rwlock); - */ - - return ent; -} - -/** - * Retrieve entry for a fdb and a entry name (tbl or index) - * - */ -fdb_tbl_ent_t *fdb_table_entry_by_name(fdb_t *fdb, const char *name) -{ - fdb_tbl_ent_t *ent; - - __lock_wrlock_shared(fdb); - ent = hash_find_readonly(fdb->h_ents_name, &name); - Pthread_rwlock_unlock(&fdb->h_rwlock); - - return ent; -} - -static fdb_tbl_ent_t *get_fdb_tbl_ent_by_rootpage(int rootpage) -{ - fdb_t *fdb; - fdb_tbl_ent_t *ent = NULL; - int i; - - Pthread_rwlock_rdlock(&fdbs.arr_lock); - for (i = 0; i < fdbs.nused; i++) { - fdb = fdbs.arr[i]; - - ent = get_fdb_tbl_ent_by_rootpage_from_fdb(fdb, rootpage); - - if (ent) - break; + if (init->locked_stat1) { + fdb_unlock_table(clnt, ((fdb_tbl_t *)init->locked_stat1)->ents.top); + init->locked_stat1 = NULL; + } + if (init->locked_stat4) { + fdb_unlock_table(clnt, ((fdb_tbl_t *)init->locked_stat4)->ents.top); + init->locked_stat4 = NULL; } - Pthread_rwlock_unlock(&fdbs.arr_lock); - return ent; + /* release live lock for fdb; that is ok, all is cached in the sqlite engine + * and we will get new live locks when we lock tables + */ + put_fdb(init->fdb, FDB_PUT_NOFREE); + init->fdb = NULL; } /** @@ -1693,12 +1627,12 @@ static fdb_tbl_ent_t *get_fdb_tbl_ent_by_rootpage(int rootpage) * Caller must free the returned pointer * */ -char *fdb_sqlexplain_get_name(int rootpage) +char *fdb_sqlexplain_get_name(struct sqlclntstate *clnt, int rootpage) { fdb_tbl_ent_t *ent; char tmp[1024]; - ent = get_fdb_tbl_ent_by_rootpage(rootpage); + ent = fdb_clnt_cache_get_ent(clnt, rootpage); /* NOTE: do we support live table removals? */ if (ent) { @@ -1716,112 +1650,11 @@ char *fdb_sqlexplain_get_name(int rootpage) return strdup(tmp); } -int create_sqlite_master_table(const char *etype, const char *name, - const char *tbl_name, int rootpage, - const char *sql, const char *csc2, - char **ret_rec, int *ret_rec_len) -{ -#define SQLITE_MASTER_ROW_COLS 6 - Mem mems[SQLITE_MASTER_ROW_COLS], *m; - int rc; - - logmsg(LOGMSG_INFO, "Creating master table for %s %s %s %d \"%s\" \"%s\"\n", etype, name, - tbl_name, rootpage, sql, csc2); - - bzero(&mems, sizeof(mems)); - *ret_rec = NULL; - *ret_rec_len = 0; - - /* type */ - m = &mems[0]; - m->z = strdup(etype); - if (!m->z) { - logmsg(LOGMSG_ERROR, "ENOMEM: %d Malloc %zu\n", __LINE__, - strlen(etype)); - return FDB_ERR_MALLOC; - } - m->n = strlen(etype); - m->flags = MEM_Str | MEM_Ephem; - /* name */ - m++; - m->z = strdup(name); - if (!m->z) { - logmsg(LOGMSG_ERROR, "ENOMEM: %d Malloc %zu\n", __LINE__, strlen(name)); - free(mems[0].z); - return FDB_ERR_MALLOC; - } - m->n = strlen(name); - m->flags = MEM_Str | MEM_Ephem; - /* tbl_name */ - m++; - m->z = strdup(tbl_name); - if (!m->z) { - logmsg(LOGMSG_ERROR, "ENOMEM: %d Malloc %zu\n", __LINE__, - strlen(tbl_name)); - free(mems[0].z); - free(mems[1].z); - return FDB_ERR_MALLOC; - } - m->n = strlen(tbl_name); - m->flags = MEM_Str | MEM_Ephem; - /* rootpage */ - m++; - m->u.i = rootpage; - m->flags = MEM_Int; - /* sql */ - m++; - m->z = strdup(sql); - if (!m->z) { - logmsg(LOGMSG_ERROR, "ENOMEM: %d Malloc %zu\n", __LINE__, strlen(sql)); - free(mems[0].z); - free(mems[1].z); - free(mems[2].z); - return FDB_ERR_MALLOC; - } - m->n = strlen(sql); - m->flags = MEM_Str | MEM_Ephem; - /* csc2 */ - m++; - if (csc2) { - m->z = strdup(csc2); - if (!m->z) { - logmsg(LOGMSG_ERROR, "ENOMEM: %d Malloc %zu\n", __LINE__, - strlen(csc2)); - free(mems[0].z); - free(mems[1].z); - free(mems[2].z); - free(mems[4].z); - return FDB_ERR_MALLOC; - } - m->n = strlen(csc2); - m->flags = MEM_Str | MEM_Ephem; - } else { - m->flags = MEM_Null; - } - - rc = sqlite3_unpacked_to_packed(mems, SQLITE_MASTER_ROW_COLS, ret_rec, - ret_rec_len); - if (rc) { - logmsg(LOGMSG_ERROR, "ENOMEM: Malloc error\n"); - free(mems[0].z); - free(mems[1].z); - free(mems[2].z); - free(mems[4].z); - free(mems[5].z); - return FDB_ERR_MALLOC; - } - - return FDB_NOERR; -} - /** * insert an entry using a packed sqlite row ; no locking here, table is not yet * visible */ -static int insert_table_entry_from_packedsqlite(fdb_t *fdb, fdb_tbl_t *tbl, - char *row, int rowlen, - fdb_tbl_ent_t **found_ent, - int versioned) +static int _add_fdb_tbl_ent_from_packedksqlite(const char *dbname, fdb_tbl_t *tbl, char *row, int rowlen, int versioned) { fdb_tbl_ent_t *ent = (fdb_tbl_ent_t *)calloc(sizeof(fdb_tbl_ent_t), 1); char *etype, *name, *tbl_name, *sql, *csc2; @@ -1836,7 +1669,6 @@ static int insert_table_entry_from_packedsqlite(fdb_t *fdb, fdb_tbl_t *tbl, return FDB_ERR_MALLOC; } - /* sqlite_stats are updated under this lock, we don't need it here */ rootpage = get_rootpage_numbers(1); version = 0; @@ -1845,10 +1677,10 @@ static int insert_table_entry_from_packedsqlite(fdb_t *fdb, fdb_tbl_t *tbl, &csc2, &version, rootpage); if (gbl_fdb_track) - logmsg(LOGMSG_USER, "%s:%s Inserting table %s:%s rootp=%d src_rootp=%d " - "version=%llu, sql %s\n", - fdb->dbname, tbl->name, name, tbl_name, rootpage, - source_rootpage, version, sql); + logmsg(LOGMSG_USER, + "%s:%s Inserting table %s:%s rootp=%d src_rootp=%d " + "version=%llu, sql %s\n", + dbname, tbl->name, name, tbl_name, rootpage, source_rootpage, version, sql); if (strcasecmp(name, tbl_name) && (where = strstr(sql, ") where (")) != NULL) { @@ -1901,19 +1733,11 @@ static int insert_table_entry_from_packedsqlite(fdb_t *fdb, fdb_tbl_t *tbl, } ent->name = strdup(name); ent->tbl = tbl; - listc_abl(&tbl->ents, ent); - hash_add(fdb->h_ents_rootp, ent); - hash_add(fdb->h_ents_name, ent); - - if (strcasecmp(ent->name, "sqlite_stat1") == 0) { - fdb->has_sqlstat1 = 1; - } - if (strcasecmp(ent->name, "sqlite_stat4") == 0) { - fdb->has_sqlstat4 = 1; - } - if (strcasecmp(ent->name, tbl->name) == 0) { - *found_ent = ent; - } + /* makes sure the table itself is the first entry */ + if (ent->ixnum == -1) + listc_atl(&tbl->ents, ent); + else + listc_abl(&tbl->ents, ent); if (versioned) { /* Do to the way sqlite_stat tables are added on the first table request @@ -1968,17 +1792,33 @@ void *fdb_get_sqlite_master_entry(fdb_t *fdb, fdb_tbl_ent_t *ent) /** * Move a cursor on sqlite_master table - * Since we generate schema for remote tables - * on demand, this routine is optimized to - * pick and walk only a fdb, and also to return only - * rows for table, not all + * This routine will only returned the to-be-attached table and stats tables + * + * NOTE: during the call sqlite3AddAndLockTable we acquire read locks + * for fdb_tbl objects of the table and stats; therefore this call is lockless + * NOTE2: we do return sqlite_stats even if they exist for now, and + * we have code deep inside sqlite3StartTable that skip the duplicate adds + * NOTE3: there were some issues with sqlitex and table names in the past, + * when we validate columns for remote queries; the sqlitex does not + * have a cache, and we will be relying on sqlite engine cache for the data + * NOTE4: we need to be aware that we might not have stats tables; if no stat1 + * we assume automatically no stat4 * */ - int fdb_cursor_move_master(BtCursor *pCur, int *pRes, int how) { - const char *zTblName; + sqlite3 *db = pCur->sqlite; + fdb_t *fdb = pCur->bt->fdb; + fdb_tbl_t *tbl = NULL; + /* to walk up to three tables, we use step to tell which table we need + * order is + * 0 - table (no stats table) + * 1 - stat1 + * 2 - stat2 + */ + int step = 0; + const char *zTblName; if (gbl_old_column_names && pCur->clnt->thd && pCur->clnt->thd->query_preparer_running) { /* We must have a query_preparer_plugin installed. */ @@ -1988,31 +1828,18 @@ int fdb_cursor_move_master(BtCursor *pCur, int *pRes, int how) zTblName = query_preparer_plugin->sqlitex_table_name( pCur->query_preparer_data); } else { - sqlite3 *sqlite = pCur->sqlite; - zTblName = sqlite->init.zTblName; + zTblName = db->init.zTblName; } - fdb_t *fdb = pCur->bt->fdb; - fdb_tbl_t *tbl = NULL; - int step = 0; assert(fdb != NULL); - - /* - NOTE: there are two types of calls - 1) when a table is attached first time to a sqlite engine: - the fdb exists and has an sqlite_master already; in this case - the comdb2_dynamic_attach code sets init.zTblName to point to - the desired table - 2) after a schema flush; in this case fdb exists but it has no - sqlite_master; this is called with init.zTblName == NULL, which - would mean "give me whatever we have local, I am gonna populate - this engine" - */ + assert(zTblName); + assert(how == CNEXT || how == CFIRST); /* NEXT w/out FIRST is FIRST */ pCur->eof = 0; - /* are we walking the sqlite_stats? */ + /* is this not the first row we try to get? */ if (pCur->crt_sqlite_master_row) { + /* first row, check if we are walking the sqlite_stats */ if (strncasecmp(pCur->crt_sqlite_master_row->name, "sqlite_stat1", 12) == 0) { goto sqlite_stat1; @@ -2022,48 +1849,38 @@ int fdb_cursor_move_master(BtCursor *pCur, int *pRes, int how) goto sqlite_stat4; } } else { - /* this is the first time we step and locate a table; we - will need to position on the current table; given the order - chosen {table, stat1, stat4, done}, if table is stat4, we - end up skipping stat1. To fix this, we replace stat4 with - stat1 since we will get stat4 after this. - */ - if (strncasecmp(zTblName, "sqlite_stat4", 12) == 0) - zTblName = "sqlite_stat1"; - /* In addition, if the first remote table from this fdb - is sqlite_master, we only get stats tables, and the follow-up - hash_find_readonly returns no entry, since we don't have an - entry for sqlite_master; fix this by pointing to sqlite_stat1 - as well */ - if (strncasecmp(zTblName, "sqlite_master", 13) == 0) - zTblName = "sqlite_stat1"; + /* this is the first row we are getting; + * preserve the order {table, stat1, stat4, done} + */ + /* if table is stat4, we + * end up skipping stat1. To fix this, we replace stat4 with + * stat1 since we will get stat4 after this. + */ + if (strncasecmp(zTblName, "sqlite_stat4", 12) == 0) { + goto sqlite_stat1; + } else { + /* In addition, if the first remote table from this fdb + * is sqlite_master, we only get stats tables stat1 and stat4 + */ + if (strncasecmp(zTblName, "sqlite_master", 13) == 0) { + goto sqlite_stat1; + } + } } search: - __lock_wrlock_shared(fdb); - tbl = hash_find_readonly(fdb->h_tbls_name, &zTblName); - - if (!tbl) { - /* this is possible only for wrong tblname? */ - Pthread_rwlock_unlock(&fdb->h_rwlock); - /* done, the table is gone */ - /* TODO: review drop table case */ - pCur->eof = 1; - *pRes = 1; - return SQLITE_OK; - } - Pthread_mutex_lock(&tbl->ents_mtx); - Pthread_rwlock_unlock(&fdb->h_rwlock); - - assert(how == CNEXT || how == CFIRST); /* NEXT w/out FIRST is FIRST */ + tbl = _clnt_cache_get_tbl_by_name(pCur->clnt, zTblName); + assert(tbl); /* we have a pthread_rwlock_rdlock here */ if (!pCur->crt_sqlite_master_row) { pCur->crt_sqlite_master_row = tbl->ents.top; assert(pCur->crt_sqlite_master_row); } else { if (!pCur->crt_sqlite_master_row->lnk.next) { - Pthread_mutex_unlock(&tbl->ents_mtx); - + /* we consumed last row from the current table, + * "step" will tell us if there is a followup + * table we need to consider + */ switch (step) { case 0: pCur->crt_sqlite_master_row = NULL; @@ -2080,33 +1897,41 @@ int fdb_cursor_move_master(BtCursor *pCur, int *pRes, int how) } if (!pCur->crt_sqlite_master_row) { - pCur->eof = 1; - *pRes = 1; - return SQLITE_OK; + goto done; } - Pthread_mutex_unlock(&tbl->ents_mtx); - + /* we have a row ! */ *pRes = 0; - return SQLITE_OK; sqlite_stat1: - /* NOTE: this is a bit of hack; when we are parsing the sqlite_mastter - tables, - we match the table name from zTblName, but also need sqlite_stats */ - /* we still have the fdb->h_rwlock here */ - /* locate btree position */ + if (!fdb->has_sqlstat1) { + /* we are trying to get stats, + * but no stats, done + */ + goto done; + } zTblName = "sqlite_stat1"; step = 1; goto search; sqlite_stat4: + if (!fdb->has_sqlstat4) { + /* we are trying to get stat4, + * but no stat4, done + */ + goto done; + } zTblName = "sqlite_stat4"; step = 2; goto search; + +done: + pCur->eof = 1; + *pRes = 1; + return SQLITE_OK; } /** @@ -2114,47 +1939,47 @@ int fdb_cursor_move_master(BtCursor *pCur, int *pRes, int how) * "ixnum", * field "fieldnum" */ -char *fdb_sqlexplain_get_field_name(Vdbe *v, int rootpage, int ixnum, - int fieldnum) +char *fdb_sqlexplain_get_field_name(struct sqlclntstate *clnt, Vdbe *v, int rootpage, int ixnum, int fieldnum) { fdb_tbl_ent_t *ent; Table *pTab; Index *pIdx; - Column *pCol; + Column *pCol = NULL; if (!v) return NULL; - ent = get_fdb_tbl_ent_by_rootpage(rootpage); + ent = fdb_clnt_cache_get_ent(clnt, rootpage); if (!ent) - return NULL; + goto done; if (ent->ixnum == -1) { pTab = sqlite3FindTableCheckOnly(v->db, ent->name, ent->tbl->fdb->dbname); if (!pTab) - return NULL; + goto done; if (fieldnum < 0 || fieldnum > pTab->nCol) - return NULL; + goto done; pCol = &pTab->aCol[fieldnum]; } else { pIdx = sqlite3FindIndex(v->db, ent->name, ent->tbl->fdb->dbname); if (!pIdx) - return NULL; + goto done; if (fieldnum < 0 || fieldnum > pIdx->nColumn) - return NULL; + goto done; if (pIdx->aiColumn[fieldnum] < 0 || pIdx->aiColumn[fieldnum] > pIdx->pTable->nCol) - return NULL; + goto done; pCol = &pIdx->pTable->aCol[pIdx->aiColumn[fieldnum]]; } - return pCol->zName; +done: + return pCol ? pCol->zName : NULL; } static int _fdb_remote_reconnect(fdb_t *fdb, COMDB2BUF **psb, char *host, int use_cache) @@ -2484,7 +2309,6 @@ static void _cursor_set_common(fdb_cursor_if_t *fdbc_if, char *tid, int flags, fdbc_if->tbl_has_partidx = fdb_cursor_table_has_partidx; fdbc_if->tbl_has_expridx = fdb_cursor_table_has_expridx; fdbc_if->dbname = fdb_cursor_dbname; - fdbc_if->table_entry = fdb_cursor_table_entry; fdbc_if->access = fdb_cursor_access; comdb2uuid(fdbc->ciduuid); @@ -2795,7 +2619,7 @@ fdb_cursor_if_t *fdb_cursor_open(sqlclntstate *clnt, BtCursor *pCur, * remote rootpage 1 */ ent = NULL; } else if (rootpage != -1 /* not shared cursor ! */) { - ent = get_fdb_tbl_ent_by_rootpage_from_fdb(fdb, rootpage); + ent = fdb_clnt_cache_get_ent(clnt, rootpage); if (!ent) { logmsg(LOGMSG_ERROR, "%s: unable to find rootpage %d\n", __func__, rootpage); @@ -2822,16 +2646,23 @@ fdb_cursor_if_t *fdb_cursor_open(sqlclntstate *clnt, BtCursor *pCur, /* the way we encode server version is due to R5 lacking version support */ if (ent && is_sqlite_stat(ent->name)) { - pCur->fdbc = fdbc_if = - fdb_sqlstat_cache_cursor_open(clnt, fdb, ent->name); + + /* this gets us a sqlite_stats cache protected by fdb's sqlstats_mtx */ + fdb_sqlstat_cache_t *cache = _sqlstats_get(fdb, clnt); + if (!cache) { + logmsg(LOGMSG_ERROR, "%s: failed to retrieve sqlite stats cache\n", __func__); + clnt->fdb_state.preserve_err = 1; + errstat_set_rcstrf(&clnt->fdb_state.xerr, FDB_ERR_GENERIC, "failed to open sqlite stats"); + goto done; + } + + pCur->fdbc = fdbc_if = fdb_sqlstat_cache_cursor_open(clnt, fdb, ent->name, cache); if (!fdbc_if) { logmsg(LOGMSG_ERROR, "%s: failed to open fdb cursor\n", __func__); clnt->fdb_state.preserve_err = 1; - clnt->fdb_state.xerr.errval = FDB_ERR_BUG; - snprintf(clnt->fdb_state.xerr.errstr, - sizeof(clnt->fdb_state.xerr.errstr), - "failed to open fdb cursor for stats"); + errstat_set_rcstrf(&clnt->fdb_state.xerr, FDB_ERR_BUG, "failed to open fdb cursor for stats"); + fdb_sqlstats_put(fdb); goto done; } @@ -2901,7 +2732,6 @@ static void fdb_cursor_close_on_open(BtCursor *pCur, int cache) static int fdb_cursor_close(BtCursor *pCur) { if (pCur->fdbc) { - /*TODO: check sqlite_stat cursors and their caching */ fdb_cursor_t *fdbc = pCur->fdbc->impl; if (fdbc->type == FCON_TYPE_LEGACY) { @@ -3339,7 +3169,9 @@ static void _fdb_handle_sqlite_schema_err(fdb_cursor_t *fdbc, char *errstr) multiple sql engines, maybe with different values if the remote table is schema changed repeatedly */ + pthread_mutex_lock(&fdbc->ent->tbl->need_version_mtx); fdbc->ent->tbl->need_version = remote_version + 1; + pthread_mutex_unlock(&fdbc->ent->tbl->need_version_mtx); } static int _fdb_handle_io_read_error(BtCursor *pCur, int *retry, int *pollms, @@ -3664,23 +3496,16 @@ static int fdb_cursor_find_sql(BtCursor *pCur, Mem *key, int nfields, /* This returns the sqlstats table under a mutex + NOTE: the stat1/4 are clnt cached objects */ -fdb_sqlstat_cache_t *fdb_sqlstats_get(fdb_t *fdb) +static fdb_sqlstat_cache_t *_sqlstats_get(fdb_t *fdb, sqlclntstate *clnt) { int rc = 0; - struct sql_thread *thd; - sqlclntstate *clnt; int interval = bdb_attr_get(thedb->bdb_attr, BDB_ATTR_FDB_SQLSTATS_CACHE_LOCK_WAITTIME_NSEC); if (!interval) interval = 100; - /* this should be an sql thread */ - thd = pthread_getspecific(query_info_key); - if (!thd) return NULL; - - clnt = thd->clnt; - /* remote sql stats are implemented as a critical region I was told that mutex is faster, lul We need to allow bdb lock to recover if we keep waiting @@ -3722,12 +3547,13 @@ fdb_sqlstat_cache_t *fdb_sqlstats_get(fdb_t *fdb) } while (1); if (fdb->sqlstats == NULL) { - /* create them */ + /* create the stats cache */ rc = fdb_sqlstat_cache_create(clnt, fdb, fdb->dbname, &fdb->sqlstats); if (rc) { logmsg(LOGMSG_ERROR, "%s: failed to create cache rc=%d\n", __func__, rc); fdb->sqlstats = NULL; fdb_sqlstats_put(fdb); + return NULL; } } @@ -3797,13 +3623,6 @@ static char *fdb_cursor_dbname(BtCursor *pCur) return pCur->fdbc->impl->ent->tbl->fdb->dbname; } -static fdb_tbl_ent_t *fdb_cursor_table_entry(BtCursor *pCur) -{ - assert(pCur->fdbc); - - return pCur->fdbc->impl->ent; -} - const char *fdb_parse_comdb2_remote_dbname(const char *zDatabase, const char **fqDbname) { @@ -3929,6 +3748,7 @@ static int fdb_cursor_insert(BtCursor *pCur, sqlclntstate *clnt, trans->seq++; trans->nwrites++; + trans->writes_status = FDB_TRAN_WRITES; return rc; } @@ -3978,6 +3798,7 @@ static int fdb_cursor_delete(BtCursor *pCur, sqlclntstate *clnt, trans->seq++; trans->nwrites++; + trans->writes_status = FDB_TRAN_WRITES; if (rc == 0) { rc = fdb_set_genid_deleted(trans, genid); @@ -4048,6 +3869,7 @@ static int fdb_cursor_update(BtCursor *pCur, sqlclntstate *clnt, trans->seq++; trans->nwrites++; + trans->writes_status = FDB_TRAN_WRITES; if (rc == 0) { rc = fdb_set_genid_deleted(trans, genid); @@ -4314,13 +4136,16 @@ void fdb_client_set_identityBlob(sqlclntstate *clnt, cdb2_hndl_tp *hndl) } } -int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects) +int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects, int *is_distributed) { fdb_distributed_tran_t *dtran = clnt->dbtran.dtran; fdb_tran_t *tran, *tmp; fdb_msg_t *msg; int rc = 0; uuidstr_t tus; + + *is_distributed = 0; + if (!dtran) return 0; @@ -4358,18 +4183,17 @@ int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects) continue; if (tran->is_cdb2api) { - if (tran->nwrites) { - /* handle is only created upon first remote write to this fdb */ + /* handle is only created upon first remote write to this fdb */ + if (tran->writes_status >= FDB_TRAN_BEGIN) { assert(tran->fcon.hndl); - fdb_client_set_identityBlob(clnt, tran->fcon.hndl); rc = cdb2_run_statement(tran->fcon.hndl, "commit"); if (!rc) { cdb2_effects_tp effects; int irc; if ((irc = cdb2_get_effects(tran->fcon.hndl, &effects))) { - logmsg(LOGMSG_ERROR, "%s failed to get effects rc %d %s\n", - __func__, irc, cdb2_errstr(tran->fcon.hndl)); + logmsg(LOGMSG_ERROR, "%s failed to get effects rc %d %s\n", __func__, irc, + cdb2_errstr(tran->fcon.hndl)); } else { clnt->remote_effects.num_affected += effects.num_affected; clnt->remote_effects.num_selected += effects.num_selected; @@ -4377,16 +4201,22 @@ int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects) clnt->remote_effects.num_deleted += effects.num_deleted; clnt->remote_effects.num_inserted += effects.num_inserted; } + /* 2pc case; if the remote did no generate any writes, there is no + * remote bplog, so do not include this fdb as a participant + */ + tran->nwrites = effects.num_inserted + effects.num_deleted + effects.num_updated; + if (tran->nwrites) { + tran->writes_status = FDB_TRAN_WRITES; + *is_distributed = 1; + } } - } else { - rc = 0; + if (gbl_debug_disttxn_trace) + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s txnid %s use_2pc %d writes %d distributed %d\n", __func__, + clnt->coordinator_dbname, clnt->dist_txnid, clnt->use_2pc, tran->nwrites, *is_distributed); } } else { - if (tran->nwrites || !clnt->use_2pc) { - rc = fdb_send_commit(msg, tran, clnt->dbtran.mode, tran->fcon.sb); - if (clnt->use_2pc) - clnt->sent_fdb_commit = 1; - } + *is_distributed = 1; + rc = fdb_send_commit(msg, tran, clnt->dbtran.mode, tran->fcon.sb); } if (rc) { logmsg(LOGMSG_ERROR, "%s: failed to commit %s rc %d\n", @@ -4395,17 +4225,20 @@ int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects) /* pass the error to clnt */ bzero(&clnt->osql.xerr, sizeof(clnt->osql.xerr)); errstat_set_rc(&clnt->osql.xerr, rc); - if (tran->errstr) // TODO: this can be non-null even when no error + if (tran->errstr) errstat_set_str(&clnt->osql.xerr, tran->errstr); clnt->osql.error_is_remote = 1; } - if (clnt->use_2pc && (tran->nwrites > 0)) { + if (clnt->use_2pc && tran->writes_status == FDB_TRAN_WRITES) { const char *tier = fdb_dbname_class_routing(tran->fdb); if ((rc = add_participant(clnt, tran->fdb->dbname, tier)) != 0) { tran->errstr = strdup("multiple participants with same dbname"); break; } + if (gbl_debug_disttxn_trace) + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s txnid %s add participant %s\n", __func__, + clnt->coordinator_dbname, clnt->dist_txnid, tran->fdb->dbname); } if (gbl_fdb_track) logmsg(LOGMSG_USER, "%s Send Commit tid=%s db=\"%s\" rc=%d\n", __func__, comdb2uuidstr((unsigned char *)tran->tid, tus), @@ -4472,7 +4305,7 @@ int fdb_trans_rollback(sqlclntstate *clnt) fdb_distributed_tran_t *dtran = clnt->dbtran.dtran; fdb_tran_t *tran, *tmp; fdb_msg_t *msg; - int rc; + int rc = 0; if (!dtran) return 0; @@ -4492,28 +4325,25 @@ int fdb_trans_rollback(sqlclntstate *clnt) return FDB_ERR_MALLOC; } - /* TODO: here we replace the trivial 2PC with the actual thing */ - Pthread_mutex_lock(&clnt->dtran_mtx); LISTC_FOR_EACH(&dtran->fdb_trans, tran, lnk) { if (tran->is_cdb2api) { - if (tran->nwrites) { - /* handle is only created upon first remote write to this fdb */ - assert(tran->fcon.hndl); + /* handle is only created upon first remote write to this fdb */ + if (tran->fcon.hndl) { + /* if there is no handle, it means that we never sent a begin remotely + * probably because this was a local writes that required remote reads + */ fdb_client_set_identityBlob(clnt, tran->fcon.hndl); rc = cdb2_run_statement(tran->fcon.hndl, "rollback"); - } else { - rc = 0; } } else { rc = fdb_send_rollback(msg, tran, clnt->dbtran.mode, tran->fcon.sb); } if (rc) { - logmsg(LOGMSG_ERROR, "%s: failed to rollback %s rc %d\n", - __func__, tran->fdb->dbname, rc); + logmsg(LOGMSG_ERROR, "%s: failed to rollback %s rc %d\n", __func__, tran->fdb->dbname, rc); } if (gbl_fdb_track) @@ -4546,18 +4376,6 @@ int fdb_trans_rollback(sqlclntstate *clnt) char *fdb_trans_id(fdb_tran_t *trans) { return trans->tid; } -int fdb_is_sqlite_stat(fdb_t *fdb, int rootpage) -{ - fdb_tbl_ent_t *ent; - - ent = get_fdb_tbl_ent_by_rootpage_from_fdb(fdb, rootpage); - if (!ent) - return 1; - - return strncasecmp(ent->tbl->name, "sqlite_stat", strlen("sqlite_stat")) == - 0; -} - char *fdb_get_alias(const char **p_tablename) { char *errstr = NULL; @@ -4670,130 +4488,162 @@ int fdb_master_is_local(BtCursor *pCur) } /** - * Internal function to remove all the ent objects for a table - * It collects the table associated with the entry, which can - * be an index or the actual table + * free the table object + * NOTE: this is not linked anymore in fdb + * we do have a fdb live lock * */ -static int __free_fdb_tbl(void *obj, void *arg) +static int _free_fdb_tbl(fdb_t *fdb, fdb_tbl_t *tbl) { - fdb_tbl_t *tbl = (fdb_tbl_t *)obj; - fdb_t *fdb = (fdb_t *)arg; fdb_tbl_ent_t *ent, *tmp; - /* check if this is a sqlite_stat table, for which stat might be present; - if so, clear it */ - if (is_sqlite_stat(tbl->name)) { - /* this wipes all the sqlite stats, easier; we could review and - delete only one stat at a time */ - fdb_sqlstat_cache_destroy(&fdb->sqlstats); + LISTC_FOR_EACH_SAFE(&tbl->ents, ent, tmp, lnk) + { + /* free each index/data entry */ + listc_rfl(&tbl->ents, ent); + if (ent->ent) + free(ent->ent); + free(ent->name); + free(ent); + } + + /* free table itself */ + free(tbl->name); + Pthread_rwlock_destroy(&tbl->table_lock); + Pthread_mutex_destroy(&tbl->need_version_mtx); + + free(tbl); + + return FDB_NOERR; /* hash_for requires this prototype */ +} + +/** + * calling _free_fdb_tbl if this is the last reader + * NOTE: this is unlinked already (because stale) + * we do have a live lock for fdb + */ +static void _try_free_fdb_tbl(fdb_t *fdb, fdb_tbl_t *tbl) +{ + /* trylock to see if there are any reader sqlite, if not, free it */ + if (pthread_rwlock_trywrlock(&tbl->table_lock) == 0) { + Pthread_rwlock_unlock(&tbl->table_lock); + _free_fdb_tbl(fdb, tbl); + } else { + /* there are sqlite engines reading this; last to unlock + * this table will free it + */ } +} + +/*Link/Unlink a table from fdb; tables_mtx is acquired */ +static void _xlink_fdb_table(fdb_t *fdb, fdb_tbl_t *tbl, int add) +{ + fdb_tbl_ent_t *ent, *tmp; + + if (gbl_fdb_track) + logmsg(LOGMSG_USER, "%sinking %s to %s\n", add ? "L" : "Unl", tbl->name, fdb->dbname); - /* free each entry for table */ + /* free each fdb_tbl_ent from hashes */ LISTC_FOR_EACH_SAFE(&tbl->ents, ent, tmp, lnk) { - /* unlink the entry from everywhere */ - hash_del(fdb->h_ents_rootp, ent); - hash_del(fdb->h_ents_name, ent); - - /* free this entry */ - listc_rfl(&tbl->ents, ent); - if (ent->ent) - free(ent->ent); - free(ent->name); - free(ent); + /* add/rem each entry first */ + if (add) { + hash_add(fdb->h_ents_rootp, ent); + hash_add(fdb->h_ents_name, ent); + } else { + hash_del(fdb->h_ents_rootp, ent); + hash_del(fdb->h_ents_name, ent); + } } - - /* free table itself */ - hash_del(fdb->h_tbls_name, tbl); - free(tbl->name); - Pthread_mutex_destroy(&tbl->ents_mtx); - free(tbl); - - return FDB_NOERR; + /* add/rem table itself from hash and list*/ + if (add) { + tbl->fdb = fdb; + listc_abl(&fdb->tables, tbl); + hash_add(fdb->h_tbls_name, tbl); + } else { + listc_rfl(&fdb->tables, tbl); + hash_del(fdb->h_tbls_name, tbl); + } +} +static void _link_fdb_table(fdb_t *fdb, fdb_tbl_t *tbl) +{ + _xlink_fdb_table(fdb, tbl, 1); +} +static void _unlink_fdb_table(fdb_t *fdb, fdb_tbl_t *tbl) +{ + _xlink_fdb_table(fdb, tbl, 0); } /** * Purge the schema for a specific db * If tbl== NULL, purge all the tables * - * NOTE: caller needs to grab TAGAPI_LK ! + * NOTE: this can fail if there are still readers; under fdbs mutex, + * we trylock and if that fails, routine print error and does nothing + * Alternative would be to block under mutex, and that will block any + * new read access to this fdb * */ -static void fdb_clear_schema(const char *dbname, const char *tblname, - int need_update) +static void _clear_schema(const char *dbname, const char *tblname, int force) { fdb_t *fdb; fdb_tbl_t *tbl; -#if 0 - int already_updated; -#endif + int locked = 0; + + Pthread_mutex_lock(&fdbs.arr_mtx); /* map name to fdb */ - fdb = get_fdb(dbname); + fdb = _cache_fnd_fdb(dbname, NULL); if (!fdb) { logmsg(LOGMSG_ERROR, "unknown fdb \"%s\"\n", dbname); - return; + } else { + /* are there any readers of this fdb */ + if (force) { + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Writelock fdb %s schema clean\n", fdb->dbname); + Pthread_rwlock_wrlock(&fdb->inuse_rwlock); + if (_test_trap_dlock1 == 2) { + _test_trap_dlock1++; + } + locked = 1; + } else { + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Trywrlock fdb %s schema clean\n", fdb->dbname); + if (pthread_rwlock_trywrlock(&fdb->inuse_rwlock) != 0) { + logmsg(LOGMSG_ERROR, "there are still readers for this fdb, cancel clear"); + } else { + locked = 1; + } + } } + Pthread_mutex_unlock(&fdbs.arr_mtx); -#if 0 - /* if we are trying to update, - it is possible that the shared version was already updated - */ - already_updated = 0; - if(need_update) - { - tbl = hash_find_readonly(fdb->h_tbls_name, &tblname); - if (tbl == NULL) - { - fprintf(stderr, "Unknown table \"%s\" in db \"%s\"\n", tblname, dbname); - already_updated = 1; - } - else if (tbl->version == tbl->need_version + 1) - { - if (gbl_fdb_track) - { - fprintf(stderr, "Table %s.%s already at version %u\n", - dbname, tblname, tbl->version); - } - already_updated = 1; - } - } - - if (already_updated) - { - /* done here */ - return; - } - - /* NOTE: lets do this during retry */ - return; -#endif - - if (__lock_wrlock_exclusive(fdb->dbname)) { + if (!locked) return; - } + /* all ours, lets clear the entries */ if (tblname == NULL) { - /* all ours, lets clear the entries */ - hash_for(fdb->h_tbls_name, __free_fdb_tbl, fdb); + fdb_tbl_t *tmp; + LISTC_FOR_EACH_SAFE(&fdb->tables, tbl, tmp, lnk) + { + _unlink_fdb_table(fdb, tbl); + _free_fdb_tbl(fdb, tbl); + } + fdb_sqlstat_cache_destroy(&fdb->sqlstats); } else { tbl = hash_find_readonly(fdb->h_tbls_name, &tblname); if (tbl == NULL) { logmsg(LOGMSG_ERROR, "Unknown table \"%s\" in db \"%s\"\n", tblname, dbname); - goto done; - } - - if (__free_fdb_tbl(tbl, fdb)) { - logmsg(LOGMSG_ERROR, - "Error clearing schema for table \"%s\" in db \"%s\"\n", - tblname, dbname); + } else { + _unlink_fdb_table(fdb, tbl); + _free_fdb_tbl(fdb, tbl); } } -done: - Pthread_rwlock_unlock(&fdb->h_rwlock); + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Unlock fdb %s schema clean\n", fdb->dbname); + pthread_rwlock_unlock(&fdb->inuse_rwlock); } /** @@ -4819,7 +4669,7 @@ static void fdb_init(void) { logmsg(LOGMSG_ERROR, "Testing routine clearing fdb structure!\n"); - Pthread_rwlock_wrlock(&fdbs.arr_lock); + Pthread_mutex_lock(&fdbs.arr_mtx); /* * we leak on purpose instead of adding extra synchronization @@ -4831,10 +4681,10 @@ static void fdb_init(void) logmsg(LOGMSG_INFO, "FDB testing reset dbopen_gen %d\n", bdb_get_dbopen_gen()); BDB_BUMP_DBOPEN_GEN(invalid, "fdb_init"); - pthread_rwlock_unlock(&fdbs.arr_lock); + Pthread_mutex_unlock(&fdbs.arr_mtx); } -static int __fdb_info_ent(void *obj, void *arg) +static int _info_ent(void *obj, void *arg) { fdb_tbl_ent_t *ent = (fdb_tbl_ent_t *)obj; @@ -4862,7 +4712,7 @@ static int __fdb_info_ent(void *obj, void *arg) return FDB_NOERR; } -static int __fdb_info_ent_save(void *obj, void *arg) +static int _info_ent_save(void *obj, void *arg) { fdb_tbl_ent_t *ent = (fdb_tbl_ent_t *)obj; fdb_systable_info_t *info = (fdb_systable_info_t *)arg; @@ -4881,29 +4731,27 @@ static int __fdb_info_ent_save(void *obj, void *arg) return FDB_NOERR; } -/** - * Report the tables for db with their versions - * If dbname == NULL, report all dbs - * +/* + * Retrieve all the tables and their indexes for this fdb + * If no info argument, the information is printed rather than saved + * Work is done under tables_mtx, so no tables can be added or removed */ -static void fdb_info_tables(fdb_t *fdb, fdb_systable_info_t *info) +static void _info_tables(fdb_t *fdb, fdb_systable_info_t *info) { - __lock_wrlock_shared(fdb); + Pthread_mutex_lock(&fdb->tables_mtx); if (!info) { - hash_for(fdb->h_ents_name, __fdb_info_ent, NULL); + hash_for(fdb->h_ents_name, _info_ent, NULL); } else { - int nents = fdb_num_entries(fdb); + int nents = _num_entries(fdb); info->arr = realloc(info->arr, sizeof(fdb_systable_ent_t) * (info->narr + nents)); if (!info->arr) { logmsg(LOGMSG_ERROR, "%s: unable to allocate virtual table info fdb\n", __func__); - goto done; - } - hash_for(fdb->h_ents_name, __fdb_info_ent_save, info); + } else + hash_for(fdb->h_ents_name, _info_ent_save, info); } -done: - Pthread_rwlock_unlock(&fdb->h_rwlock); + Pthread_mutex_unlock(&fdb->tables_mtx); } /** @@ -4911,40 +4759,45 @@ static void fdb_info_tables(fdb_t *fdb, fdb_systable_info_t *info) * If dbname == NULL, report all dbs * */ -static void fdb_info_db(const char *dbname, fdb_systable_info_t *info) +static void _info_fdb(const char *dbname, fdb_systable_info_t *info) { fdb_t *fdb; if (!dbname) { int i; - Pthread_rwlock_rdlock(&fdbs.arr_lock); + Pthread_mutex_lock(&fdbs.arr_mtx); + /* since we got arr_mtx, no get_fdb or put_fdb can race with this thread + * an existing fdb cannot go away therefore + */ for (i = 0; i < fdbs.nused; i++) { fdb = fdbs.arr[i]; if (!fdb) continue; - __fdb_add_user(fdb, 1); + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Locking fdb %s schema info\n", fdb->dbname); + Pthread_rwlock_rdlock(&fdb->inuse_rwlock); - fdb_info_tables(fdb, info); + _info_tables(fdb, info); - __fdb_rem_user(fdb, 1); + if (gbl_fdb_track_locking) + logmsg(LOGMSG_USER, "Unlock fdb %s schema info\n", fdb->dbname); + Pthread_rwlock_unlock(&fdb->inuse_rwlock); } - Pthread_rwlock_unlock(&fdbs.arr_lock); + Pthread_mutex_unlock(&fdbs.arr_mtx); } else { - fdb = get_fdb(dbname); + fdb = get_fdb(dbname, FDB_GET_LOCK); if (!fdb) { logmsg(LOGMSG_ERROR, "fdb info db: unknown dbname \"%s\"\n", dbname); return; } - __fdb_add_user(fdb, 1); + _info_tables(fdb, info); - fdb_info_tables(fdb, info); - - __fdb_rem_user(fdb, 1); + put_fdb(fdb, FDB_PUT_NOFREE); } } @@ -4957,6 +4810,7 @@ int fdb_process_message(const char *line, int lline) int st = 0; int ltok = 0; char *tok; + int force = 0; tok = segtok((char *)line, lline, &st, <ok); if (ltok == 0) { @@ -4978,7 +4832,16 @@ int fdb_process_message(const char *line, int lline) "tables names and their versions in db \"dbname\"\n"); } else if (tokcmp(tok, ltok, "init") == 0) { fdb_init(); + } else if (tokcmp(tok, ltok, "force") == 0) { + force = 1; + tok = segtok((char *)line, lline, &st, <ok); + if (ltok == 0 || tokcmp(tok, ltok, "clear")) { + logmsg(LOGMSG_ERROR, "fdb schema error: missing clear\n"); + return FDB_ERR_GENERIC; + } + goto clear; } else if (tokcmp(tok, ltok, "clear") == 0) { + clear: tok = segtok((char *)line, lline, &st, <ok); if (ltok == 0) { logmsg(LOGMSG_ERROR, "fdb schema error: missing command\n"); @@ -5003,7 +4866,7 @@ int fdb_process_message(const char *line, int lline) wrlock_schema_lk(); /* clear all tables for db "dbname" */ - fdb_clear_schema(dbname, NULL, 0); + _clear_schema(dbname, NULL, force); unlock_schema_lk(); } else { @@ -5018,7 +4881,7 @@ int fdb_process_message(const char *line, int lline) wrlock_schema_lk(); /* clear table "tblname for db "dbname" */ - fdb_clear_schema(dbname, tblname, 0); + _clear_schema(dbname, tblname, force); unlock_schema_lk(); @@ -5043,7 +4906,7 @@ int fdb_process_message(const char *line, int lline) if (tokcmp(tok, ltok, "db") == 0) { tok = segtok((char *)line, lline, &st, <ok); if (ltok == 0) { - fdb_info_db(NULL, NULL); + _info_fdb(NULL, NULL); } else { char *dbname = tokdup(tok, ltok); if (!dbname) { @@ -5051,7 +4914,7 @@ int fdb_process_message(const char *line, int lline) return FDB_ERR_MALLOC; } - fdb_info_db(dbname, NULL); + _info_fdb(dbname, NULL); free(dbname); } @@ -5095,6 +4958,8 @@ int fdb_table_version(unsigned long long version) */ void fdb_clear_sqlclntstate(sqlclntstate *clnt) { + _clnt_cache_destroy(clnt); + _fdb_clear_clnt_node_affinities(clnt); if (clnt->fdb_state.access) { @@ -5134,24 +4999,11 @@ void fdb_clear_sqlite_cache(sqlite3 *sqldb, const char *dbname, numbers */ sqlite3ResetOneSchemaByName(sqldb, "sqlite_stat1", dbname); - sqlite3ResetOneSchemaByName(sqldb, "sqlite_stat2", dbname); sqlite3ResetOneSchemaByName(sqldb, "sqlite_stat4", dbname); } -int fdb_table_exists(int rootpage) -{ - fdb_tbl_ent_t *ent = NULL; - ent = get_fdb_tbl_ent_by_rootpage(rootpage); - if (ent) - return 1; - return 0; -} - /** - * Lock a remote table schema cache - * - * A remote schema change will trigger a flush of local schema cache - * The lock prevents the flush racing against running remote access + * Read lock a remote table schema cache * */ int fdb_lock_table(sqlite3_stmt *pStmt, sqlclntstate *clnt, Table *tab, @@ -5162,18 +5014,44 @@ int fdb_lock_table(sqlite3_stmt *pStmt, sqlclntstate *clnt, Table *tab, int version = tab->version; Db *db = &((Vdbe *)pStmt)->db->aDb[tab->iDb]; - ent = get_fdb_tbl_ent_by_rootpage(rootpage); + /* this ensures live fdb object */ + fdb_t *fdb = get_fdb(db->zDbSName, FDB_GET_LOCK); + if (!fdb) { + logmsg(LOGMSG_ERROR, "%s fdb %s removed!\n", __func__, db->zDbSName); + /* we are returning here version mismatch, so that the upper + * layer will re-prepare and re-create fdb object that was cleared + */ + return SQLITE_SCHEMA_REMOTE; + } + + /* at this point we have a live lock in fdb, and we have sqlite populated + * with schemas and stats for remote tables. + * We lost any sqlite setup table locks (which are required by the remote + * table discovery process). + * We need to reacquire locks for the remote table and check if it still has + * a version matching the sqlite cached version + * + * NOTE: we remove local clnt cache when we lose locks, so we need + * to get again table from the fdb object + * NOTE2: once fdb live lock is gone, fdb can be destroyed and another + * can be created in its place; need to set the proper pointer here + * which will be valid until the table locks are unlocked + * + */ + db->pBt->fdb = fdb; + + Pthread_mutex_lock(&fdb->tables_mtx); + + ent = hash_find_readonly(fdb->h_ents_rootp, &rootpage); *p_ent = NULL; /* missing or wrong version? */ if (!ent || ent->tbl->version != tab->version) { clnt->osql.error_is_remote = 1; - clnt->osql.xerr.errval = CDB2ERR_ASYNCERR; - errstat_set_strf(&clnt->osql.xerr, - "schema change table \"%s\" from db \"%s\"", - tab->zName, db->zDbSName); + errstat_set_rcstrf(&clnt->osql.xerr, CDB2ERR_ASYNCERR, "schema change table \"%s\" from db \"%s\"", tab->zName, + db->zDbSName); if (gbl_fdb_track) { if (ent) { @@ -5186,6 +5064,8 @@ int fdb_lock_table(sqlite3_stmt *pStmt, sqlclntstate *clnt, Table *tab, db->zDbSName, tab->zName, tab->version); } } + Pthread_mutex_unlock(&fdb->tables_mtx); + put_fdb(fdb, FDB_PUT_NOFREE); return SQLITE_SCHEMA_REMOTE; } @@ -5199,34 +5079,66 @@ int fdb_lock_table(sqlite3_stmt *pStmt, sqlclntstate *clnt, Table *tab, logmsg(LOGMSG_USER, "Locking \"%s\" version %u\n", fqname, version); } - /* Lets try something simple, bumping users for fdb */ - __fdb_add_user(ent->tbl->fdb, 0); + /* add the table to clnt local cache and read lock it */ + Pthread_rwlock_rdlock(&ent->tbl->table_lock); + _clnt_cache_add_tbl(clnt, ent->tbl); *p_ent = ent; + Pthread_mutex_unlock(&fdb->tables_mtx); + return FDB_NOERR; } /** * Unlock a remote table schema cache * - * This matches fdb_lock_table, allowing again exclusive access to that table - * */ -int fdb_unlock_table(fdb_tbl_ent_t *ent) +int fdb_unlock_table(sqlclntstate *clnt, fdb_tbl_ent_t *ent) { - if (gbl_fdb_track) { + /* we do have a live fdb read lock */ + fdb_t *fdb = ent->tbl->fdb; + fdb_tbl_t *our_table = ent->tbl; + + if (gbl_fdb_track || gbl_fdb_track_locking) { char fqname[128]; - snprintf(fqname, sizeof(fqname), "%s.%s", ent->tbl->fdb->dbname, - ent->tbl->name); + snprintf(fqname, sizeof(fqname), "%s.%s", fdb->dbname, our_table->name); fqname[sizeof(fqname) - 1] = '\0'; - logmsg(LOGMSG_ERROR, "Unlocking \"%s\" version %llu\n", fqname, - ent->tbl->version); + logmsg(LOGMSG_ERROR, "Unlocking \"%s\" version %llu\n", fqname, our_table->version); + } + + /* we are about to lose the read lock on the table; + * remove it from the clnt cache first, it can be gone + * the moment we release the lock + */ + _clnt_cache_rem_tbl(clnt, our_table); + + /* + * we need to check if the table our_table is stale, + * and if so, if we are the last client + * If both are true, table needs to be freed + * + */ + Pthread_mutex_lock(&fdb->tables_mtx); + fdb_tbl_t *shared_table = hash_find_readonly(fdb->h_tbls_name, &our_table->name); + if (!shared_table || shared_table != our_table) { + + /* shared table changed, our table is stale */ + Pthread_rwlock_unlock(&our_table->table_lock); + + /* try to free it if we are the last reader */ + _try_free_fdb_tbl(fdb, our_table); + } else { + /* we still point to the current table; unlock it */ + Pthread_rwlock_unlock(&our_table->table_lock); } - __fdb_rem_user(ent->tbl->fdb, 1); + Pthread_mutex_unlock(&fdb->tables_mtx); + + /* decrement fdb live lock for this table lock */ + put_fdb(fdb, FDB_PUT_NOFREE); return FDB_NOERR; } @@ -5400,24 +5312,6 @@ static int _get_protocol_flags(sqlclntstate *clnt, fdb_t *fdb, return 0; } -/** - * Change association of a cursor to a table (see body note) - * - */ -void fdb_cursor_use_table(fdb_cursor_t *cur, struct fdb *fdb, - const char *tblname) -{ - /* - * NOTE: - * Cursors running sql are not assigned to a table per-se. - * An initial table is assigned at the beginning and used to - * retrieve the table version - * This function lets re-use the cursor with a different table - * - */ - cur->ent = get_fdb_tbl_ent_by_name_from_fdb(fdb, tblname); -} - int fdb_cursor_need_ssl(fdb_cursor_if_t *cur) { return cur->impl->need_ssl; @@ -5517,7 +5411,7 @@ int fdb_get_server_semver(const fdb_t * const fdb, const char ** version) return rc; } -static int _validate_existing_table(fdb_t *fdb, int cls, int local) +static int _validate_existing_fdb(fdb_t *fdb, int cls, int local) { if (fdb->local != local) { logmsg(LOGMSG_ERROR, @@ -5538,7 +5432,7 @@ static int _validate_existing_table(fdb_t *fdb, int cls, int local) return FDB_NOERR; } -int fdb_validate_existing_table(const char *zDatabase) +int fdb_validate_existing(const char *zDatabase) { fdb_t *fdb = NULL; int rc = FDB_NOERR; @@ -5549,17 +5443,17 @@ int fdb_validate_existing_table(const char *zDatabase) /* This points dbName at 'name' portion of zDatabase */ cls = get_fdb_class(&dbName, &local, NULL); - Pthread_rwlock_rdlock(&fdbs.arr_lock); + Pthread_mutex_lock(&fdbs.arr_mtx); /* This searches only by 'name' (so no duplicate dbnames across classes) */ - fdb = __cache_fnd_fdb(dbName, NULL); + fdb = _cache_fnd_fdb(dbName, NULL); if (fdb) { - rc = _validate_existing_table(fdb, cls, local); + rc = _validate_existing_fdb(fdb, cls, local); } /* else {}: if the fdb was removed, there is no validation to be done; fdb was probably removed and the follow up code might actually establish a new fdb */ - Pthread_rwlock_unlock(&fdbs.arr_lock); + Pthread_mutex_unlock(&fdbs.arr_mtx); return rc; } @@ -5611,7 +5505,7 @@ int fdb_systable_info_collect(void **data, int *npoints) { fdb_systable_info_t info = {0}; - fdb_info_db(NULL, &info); + _info_fdb(NULL, &info); *data = info.arr; *npoints = info.narr; @@ -6223,8 +6117,17 @@ int process_fdb_set_cdb2api_2pc(sqlclntstate *clnt, char *sqlstr, char *err, if (clnt->coordinator_dbname && clnt->coordinator_tier && clnt->dist_txnid && clnt->dist_timestamp) { + if (!coordinator_is_allowed(clnt->coordinator_dbname, clnt->coordinator_tier)) { + logmsg(LOGMSG_ERROR, "Rejecting 2pc transaction, %s/%s is not an allowed coordinator\n", + clnt->coordinator_dbname, clnt->coordinator_tier); + return -1; + } clnt->use_2pc = 1; clnt->is_participant = 1; + if (gbl_debug_disttxn_trace) { + logmsg(LOGMSG_USER, "DISTTXN REPL %s New participant db %s tier %s txnid %s tsstamp %" PRId64 "\n", + __func__, clnt->coordinator_dbname, clnt->coordinator_tier, clnt->dist_txnid, clnt->dist_timestamp); + } } return 0; @@ -6308,16 +6211,13 @@ static fdb_push_connector_t *fdb_push_connector_create(const char *dbname, struct errstat err = {0}; /* remote fdb */ - fdb_t *fdb = new_fdb(dbname, &created, class, local, override); + fdb_t *fdb = _new_fdb(dbname, &created, class, local, override); if (!fdb) return NULL; int rc = fdb_get_remote_version(fdb->dbname, tblname, fdb->class, local, &remote_version, &err); - if (sqlite3UnlockTable(dbname, tblname)) { - logmsg(LOGMSG_ERROR, "%s:%d Failed to unlock table %s on db %s\n!!", - __func__, __LINE__, tblname, dbname); - } + put_fdb(fdb, FDB_PUT_NOFREE); switch (rc) { case FDB_NOERR: @@ -6474,9 +6374,12 @@ static int _running_dist_ddl(struct schema_change_type *sc, char **errmsg, uint3 goto abort; } else if (pushes[i]) { /* need to mark the create as a remote write */ - fdb_t *fdb = get_fdb(dbnames[i]); + fdb_t *fdb = get_fdb(dbnames[i], FDB_GET_LOCK); fdb_tran_t * tran = fdb_get_subtran(clnt->dbtran.dtran, fdb); + /* ddl has no rows writes */ tran->nwrites += 1; + tran->writes_status = FDB_TRAN_WRITES; + put_fdb(fdb, FDB_PUT_NOFREE); } } @@ -6585,3 +6488,318 @@ int osql_test_remove_genshard(struct schema_change_type *sc, char **errmsg) return -1; } +/* Local cache for table schemas; + * + * We create and clear this cache when we get and release + * table locks, respectively + * + * NOTE: the object is per clnt, so per thread; + * add/rem/find do not need mutexes + */ +struct clnt_fdb_cache_ent { /* per thread linking of shared fdb_tbl_t-s */ + fdb_tbl_t *tbl; + LINKC_T(struct clnt_fdb_cache_ent) lnk; +}; +typedef struct clnt_fdb_cache_ent clnt_fdb_cache_ent_t; +struct clnt_fdb_cache { + hash_t *tbl_ent_by_rootp; + hash_t *tbl_by_name; + LISTC_T(struct clnt_fdb_cache_ent) tbls; +}; +typedef struct clnt_fdb_cache clnt_fdb_cache_t; + +static clnt_fdb_cache_t *_clnt_cache_create(void) +{ + clnt_fdb_cache_t *cache = calloc(1, sizeof(clnt_fdb_cache_t)); + if (cache) { + cache->tbl_ent_by_rootp = hash_init_i4(0); + cache->tbl_by_name = hash_init_strptr(0); + listc_init(&cache->tbls, offsetof(struct clnt_fdb_cache_ent, lnk)); + } + return cache; +} + +/** + * free the client remote table cache + * make sure we do not leak table locks + * + */ +static void _clnt_cache_destroy(sqlclntstate *clnt) +{ + clnt_fdb_cache_t *cache = clnt->remoteFdbCache; + if (cache) { + hash_free(cache->tbl_ent_by_rootp); + hash_free(cache->tbl_by_name); + if (cache->tbls.top) { + logmsg(LOGMSG_ERROR, "Locked tables leaked!\n"); + abort(); + } + clnt->remoteFdbCache = NULL; + } +} + +/* add an entry; we have tables_mtx locked, and table read locked */ +static int _clnt_cache_add_tbl(sqlclntstate *clnt, fdb_tbl_t *tbl) +{ + fdb_tbl_ent_t *ent; + clnt_fdb_cache_ent_t *cent; + + /* cache it in the clnt so that access does not race with table updates */ + if (!clnt->remoteFdbCache) { + clnt->remoteFdbCache = _clnt_cache_create(); + if (!clnt->remoteFdbCache) { + logmsg(LOGMSG_ERROR, "%s Error malloc\n", __func__); + return -1; + } + } + + cent = calloc(1, sizeof(*cent)); + if (!cent) { + logmsg(LOGMSG_ERROR, "%s Error malloc cent\n", __func__); + return -1; + } + cent->tbl = tbl; + + clnt_fdb_cache_t *cache = clnt->remoteFdbCache; + + LISTC_FOR_EACH(&tbl->ents, ent, lnk) + { + hash_add(cache->tbl_ent_by_rootp, ent); + } + hash_add(cache->tbl_by_name, tbl); + listc_abl(&cache->tbls, cent); + + return 0; +} + +/* being single threaded, no locks needed */ +void _clnt_cache_rem_tbl(sqlclntstate *clnt, fdb_tbl_t *tbl) +{ + clnt_fdb_cache_ent_t *cent, *tmp; + clnt_fdb_cache_t *cache = clnt->remoteFdbCache; + fdb_tbl_ent_t *ent; + + if (cache) { + /* remote the table ents */ + LISTC_FOR_EACH(&tbl->ents, ent, lnk) + { + hash_del(cache->tbl_ent_by_rootp, ent); + } + /* remove the tables itself */ + hash_del(cache->tbl_by_name, tbl); + LISTC_FOR_EACH_SAFE(&cache->tbls, cent, tmp, lnk) + { + if (cent->tbl == tbl) { + listc_rfl(&cache->tbls, cent); + free(cent); + break; + } + } + } +} + +fdb_tbl_ent_t *fdb_clnt_cache_get_ent(sqlclntstate *clnt, int rootpage) +{ + fdb_tbl_ent_t *ent = NULL; + if (!clnt->remoteFdbCache) + abort(); + + ent = hash_find_readonly(clnt->remoteFdbCache->tbl_ent_by_rootp, &rootpage); + return ent; +} + +fdb_tbl_ent_t *_sqlite_cache_get_ent_by_name(sqlclntstate *clnt, const char *name) +{ + fdb_tbl_t *tbl = hash_find_readonly(clnt->remoteFdbCache->tbl_by_name, &name); + return tbl ? tbl->ents.top : NULL; +} + +fdb_tbl_t *_clnt_cache_get_tbl_by_name(sqlclntstate *clnt, const char *name) +{ + fdb_tbl_t *tbl; + tbl = hash_find_readonly(clnt->remoteFdbCache->tbl_by_name, &name); + return tbl; +} + +#define RETRY_GET_STATS_PER_STAT 3 +static int _sqlstat_populate_table(fdb_t *fdb, BtCursor *cur, const char *tblname, const char *sql, + /* out */ struct temp_table *tbl, + /* out */ int *pnrows) +{ + fdb_cursor_if_t *fdbc_if; + int bdberr = 0; + int rc = 0; + char *row; + int rowlen; + int irc; + int retry = 0; + int nrows = 0; + int key; + + /* if remote failed to provide stats already, done here; we are running + with empty stats to prevent trying to read stats for every query */ + if (!cur->fdbc) + return 0; + + fdbc_if = cur->fdbc; + fdbc_if->set_sql(cur, sql); + + /* for schema changed sqlite stats, we need to provide the version! */ + fdbc_if->impl->ent = _sqlite_cache_get_ent_by_name(cur->clnt, tblname); + + /* try a few times here */ + do { + rc = fdbc_if->move(cur, CFIRST); + if (rc != IX_FND && rc != IX_FNDMORE) { + if (rc == FDB_ERR_FDB_VERSION) { + /* TODO: downgrade protocol */ + abort(); + } + if (rc != IX_EMPTY /* && rc != IX_PASTEOF*/) { + logmsg(LOGMSG_ERROR, "%s: failed to read first row from %s.%s rc=%d retry %d\n", __func__, fdb->dbname, + tblname, rc, retry); + /* error, try again, tbl untouched */ + if (cur->fdbc) + goto retry_io; + /* in this case, the remote did not answer repeated calls + and stats cursor is closed; we are done here */ + return 0; + } + /* empty stats */ + rc = 0; + goto close; + } + + do { + /* rows ! */ + row = fdbc_if->data(cur); + rowlen = fdbc_if->datalen(cur); + + key = nrows; /* there use to be some shenanigans with the keys and temp tables, + probably still there, use a copy of nrows as index for stats */ + irc = bdb_temp_table_put(thedb->bdb_env, tbl, &key, sizeof(key), row, rowlen, NULL, &bdberr); + nrows++; + if (irc) { + logmsg(LOGMSG_ERROR, "%s: failed temp table insert for %s.%s rc=%d bdberr=%d\n", __func__, fdb->dbname, + tblname, rc, bdberr); + rc = irc; + goto retry_io; + } + + if (rc == IX_FNDMORE) { + rc = fdbc_if->move(cur, CNEXT); + } else { + break; + } + } while (rc == IX_FNDMORE || rc == IX_FND); + + if (rc == IX_FND || rc == IX_EMPTY) { + rc = 0; + /* success, get out of here */ + break; + } + /* fall-through if error to retry */ + retry_io: + if (nrows > 0) { + logmsg(LOGMSG_ERROR, "%s: failed to read all rows from %s.%s rc=%d retry %d\n", __func__, fdb->dbname, + tblname, rc, retry); + + irc = bdb_temp_table_truncate(thedb->bdb_env, tbl, &bdberr); + if (irc) { + logmsg(LOGMSG_ERROR, + "%s: truncating the temp table also failed rc %d bdberr " + "%d\n", + __func__, irc, bdberr); + goto close; + } + nrows = 0; + } + if (db_is_exiting()) { + logmsg(LOGMSG_ERROR, "Interrupting %s, db is exiting\n", __func__); + fdbc_if->set_sql(cur, NULL); /* not owner of sql hint */ + return -1; + } + } while ((retry++) < RETRY_GET_STATS_PER_STAT); + +close: + *pnrows = nrows; + fdbc_if->set_sql(cur, NULL); /* not owner of sql hint */ + /* we retried a few times, return with partial stats */ + return 0; +} + +/** + * Populate temp tables with stats from remote db + * The fdb sqlstats_mtx is acquired at this point + * + */ +int fdb_sqlstat_cache_populate(struct sqlclntstate *clnt, fdb_t *fdb, + /* out */ struct temp_table *stat1, + /* out */ struct temp_table *stat4, + /* out */ int *nrows_stat1, + /* out */ int *nrows_stat4) +{ + BtCursor *cur; + fdb_cursor_if_t *fdbc_if; + /* NOTE: at this point, we collect ALL the stats; this eliminates the need + * to selectively update local cache, at the expense of a higher latency on + * initial remote db access; to be reviewed + */ + char *sql_stat1 = "select * from sqlite_stat1"; + char *sql_stat4 = "select * from sqlite_stat4 where tbl not like 'cdb2.%'"; + int rc = 0; + int irc; + + /* fake a BtCursor */ + cur = calloc(1, sizeof(BtCursor) + sizeof(Btree)); + if (!cur) { + rc = FDB_ERR_MALLOC; + logmsg(LOGMSG_ERROR, "%s: malloc\n", __func__); + goto done; + } + init_cursor(cur, NULL, (Btree *)(cur + 1)); + cur->bt->fdb = fdb; + cur->bt->is_remote = 1; + cur->rootpage = -1; /*not really used for sqlite_stats*/ + assert(cur->clnt == clnt); + + fdbc_if = fdb_cursor_open(clnt, cur, cur->rootpage, NULL, NULL, 0 /* TODO */); + if (!fdbc_if) { + logmsg(LOGMSG_ERROR, "%s: failed to connect remote to get stats\n", __func__); + rc = -1; + goto done; + } + + /* retrieve records */ + rc = _sqlstat_populate_table(fdb, cur, "sqlite_stat1", sql_stat1, stat1, nrows_stat1); + if (rc) { + logmsg(LOGMSG_ERROR, "%s: failed to populate sqlite_stat1 rc=%d\n", __func__, rc); + goto close; + } + + rc = _sqlstat_populate_table(fdb, cur, "sqlite_stat4", sql_stat4, stat4, nrows_stat4); + if (rc) { + logmsg(LOGMSG_ERROR, "%s: failed to populate sqlite_stat4 rc=%d\n", __func__, rc); + goto close; + } + +close: + /* close cursor */ + irc = fdbc_if->close(cur); + if (irc) { + logmsg(LOGMSG_ERROR, "%s: failed to close cursor rc=%d\n", __func__, rc); + } +done: + return rc; +} + +int fdb_is_sqlite_stat(sqlclntstate *clnt, int rootpage) +{ + fdb_tbl_ent_t *ent; + + ent = fdb_clnt_cache_get_ent(clnt, rootpage); + if (!ent) + return 1; + + return strncasecmp(ent->tbl->name, "sqlite_stat", strlen("sqlite_stat")) == 0; +} diff --git a/db/fdb_fend.h b/db/fdb_fend.h index 2301729d8e..44a5fda331 100644 --- a/db/fdb_fend.h +++ b/db/fdb_fend.h @@ -101,6 +101,8 @@ typedef struct fdb_sqlstat_cache fdb_sqlstat_cache_t; typedef struct fdb_sqlstat_table fdb_sqlstat_table_t; typedef struct fdb_sqlstat_cursor fdb_sqlstat_cursor_t; +enum fdb_tran_status { FDB_TRAN_NOP = 0, FDB_TRAN_BEGIN, FDB_TRAN_WRITES }; + struct fdb_tran { char magic[4]; char *tid; /* transaction id */ @@ -137,7 +139,9 @@ struct fdb_tran { bdb_state_type *bdb_state; struct temp_table *dedup_tbl; struct temp_cursor *dedup_cur; - int nwrites; /* number of writes (ins/upd/del) issues on the fdb tran */ + + int nwrites; /* how many writes were done; rows for dml; ddl instructions for ddl */ + enum fdb_tran_status writes_status; /* what status is this connection */ /** * libevent heartbeats @@ -185,8 +189,6 @@ typedef struct fdb_cursor_if { int datalen, char *data); int (*create_tran)(sqlclntstate *clnt, fdb_t *fdb, int use_ssl); - fdb_tbl_ent_t *(*table_entry)(BtCursor *pCur); - int (*access)(BtCursor *pCur, int how); } fdb_cursor_if_t; @@ -198,12 +200,28 @@ typedef struct fdb_cursor_if { int fdb_cache_init(int n); /** - * Retrieve a foreign db object - * The callers of this function should make sure a table lock is acquired - * Such by calling fdb_lock_table(). + * Retrieve a fdb object + * Protected by the fdbs array mutex + * If found, the object returned is read locked * */ -fdb_t *get_fdb(const char *dbname); +enum fdb_get_flag { FDB_GET_NOLOCK = 0, FDB_GET_LOCK = 1 }; +fdb_t *get_fdb_int(const char *dbname, enum fdb_get_flag flag, const char *f, int l); +#define get_fdb(dbname, flag) get_fdb_int(dbname, flag, __func__, __LINE__) + +/** + * Remove the read lock on a fdb object + * Protected by the fdbs array mutex + * Flag controls the removal; + * - NOFREE: the fdb is read unlocked and left in the fdbs array to be reused + * - TRYFREE: we try to write lock the fdb; if we succeed, this is the only reader + * so it will be unlinked from cache and freed + * - FORCEFREE: under fdbs array mutex we block until a write lock is acquired + * !!!CAUTION this blocks new access to fdbs until the write lock is acquired + */ +enum fdb_put_flag { FDB_PUT_NOFREE = 0, FDB_PUT_TRYFREE = 1, FDB_PUT_FORCEFREE = 2 }; +void put_fdb_int(fdb_t *fdb, enum fdb_put_flag flag, const char *f, int l); +#define put_fdb(dbname, flag) put_fdb_int(dbname, flag, __func__, __LINE__) /** * Move a cursor on sqlite_master table @@ -228,15 +246,13 @@ void *fdb_get_sqlite_master_entry(fdb_t *fdb, fdb_tbl_ent_t *ent); * Caller must free the returned pointer * */ -char *fdb_sqlexplain_get_name(int rootpage); +char *fdb_sqlexplain_get_name(struct sqlclntstate *clnt, int rootpage); /** * Retrieve the field name for the table identified by "rootpage", index - * "ixnum", - * field "fieldnum" + * "ixnum", field "fieldnum" */ -char *fdb_sqlexplain_get_field_name(Vdbe *v, int rootpage, int ixnum, - int fieldnum); +char *fdb_sqlexplain_get_field_name(struct sqlclntstate *clnt, Vdbe *v, int rootpage, int ixnum, int fieldnum); /** * Create a connection to fdb @@ -246,10 +262,10 @@ fdb_cursor_if_t *fdb_cursor_open(sqlclntstate *clnt, BtCursor *pCur, int rootpage, fdb_tran_t *trans, int *ixnum, int need_ssl); -/* - This returns the sqlstats table under a mutex +/** + * Release lock on fdb sqlstats_mtx + * */ -fdb_sqlstat_cache_t *fdb_sqlstats_get(fdb_t *fdb); void fdb_sqlstats_put(fdb_t *fdb); /** @@ -260,18 +276,16 @@ const char *fdb_table_entry_tblname(fdb_tbl_ent_t *ent); const char *fdb_table_entry_dbname(fdb_tbl_ent_t *ent); /** - * Retrieve entry for table|index given a fdb and name + * Get table entries from sqlite vdbe cache * */ -fdb_tbl_ent_t *fdb_table_entry_by_name(fdb_t *fdb, const char *name); - -int fdb_is_sqlite_stat(fdb_t *fdb, int rootpage); +fdb_tbl_ent_t *fdb_clnt_cache_get_ent(sqlclntstate *clnt, int rootpage); /* transactional api */ fdb_tran_t *fdb_trans_begin_or_join(sqlclntstate *clnt, fdb_t *fdb, int use_ssl, int *created); fdb_tran_t *fdb_trans_join(sqlclntstate *clnt, fdb_t *fdb); -int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects); +int fdb_trans_commit(sqlclntstate *clnt, enum trans_clntcomm sideeffects, int *is_distributed); int fdb_trans_rollback(sqlclntstate *clnt); char *fdb_trans_id(fdb_tran_t *trans); @@ -343,7 +357,7 @@ int fdb_lock_table(sqlite3_stmt *pStmt, sqlclntstate *clnt, Table *tab, * This matches fdb_lock_table, allowing again exclusive access to that table * */ -int fdb_unlock_table(fdb_tbl_ent_t *ent); +int fdb_unlock_table(sqlclntstate *clnt, fdb_tbl_ent_t *ent); /** * Send heartbeats to remote dbs in a distributed transaction @@ -357,18 +371,9 @@ int fdb_heartbeats(fdb_hbeats_type *hbeats); */ void fdb_heartbeat_free_tran(fdb_hbeats_type *hbeats); -/** - * Change association of a cursor to a table (see body note) - * - */ -void fdb_cursor_use_table(fdb_cursor_t *cur, struct fdb *fdb, - const char *tblname); - /* return if ssl is needed */ int fdb_cursor_need_ssl(fdb_cursor_if_t *cur); -int fdb_table_exists(int rootpage); - int fdb_set_genid_deleted(fdb_tran_t *, unsigned long long); int fdb_is_genid_deleted(fdb_tran_t *, unsigned long long); @@ -427,5 +432,23 @@ fdb_push_connector_t* fdb_push_create(const char *dbname, enum mach_class class, */ const char *fdb_retry_callback(void *arg); +/** + * Populate temp tables with stats from remote db + * The fdb sqlstats_mtx is acquired at this point + * + */ +int fdb_sqlstat_cache_populate(struct sqlclntstate *clnt, fdb_t *fdb, + /* out */ struct temp_table *stat1, + /* out */ struct temp_table *stat4, + /* out */ int *nrows_stat1, + /* out */ int *nrows_stat4); + +/** + * Return 1 if rootpage is for a sqlite_state table, + * or if no table exists for that rootpage + * + */ +int fdb_is_sqlite_stat(sqlclntstate *clnt, int rootpage); + #endif diff --git a/db/fdb_fend_cache.c b/db/fdb_fend_cache.c index cb4da27369..f957fafced 100644 --- a/db/fdb_fend_cache.c +++ b/db/fdb_fend_cache.c @@ -77,7 +77,6 @@ static char *fdb_sqlstat_cursor_name(BtCursor *pCur); static int fdb_sqlstat_cursor_has_partidx(BtCursor *pCur); static int fdb_sqlstat_cursor_has_expridx(BtCursor *pCur); static char *fdb_sqlstat_cursor_dbname(BtCursor *pCur); -static fdb_tbl_ent_t *fdb_sqlstat_cursor_table_entry(BtCursor *pCur); static int fdb_sqlstat_cursor_access(BtCursor *pCur, int how); static int fdb_sqlstat_cursor_insert(BtCursor *pCur, struct sqlclntstate *clnt, @@ -93,220 +92,21 @@ static int fdb_sqlstat_cursor_update(BtCursor *pCur, struct sqlclntstate *clnt, unsigned long long genid, int datalen, char *data); -static int insert_sqlstat_row_from_packedsqlite(fdb_t *fdb, - fdb_sqlstat_table_t *tbl, - char *row, int rowlen) +static int __fdb_sqlstat_table_init(fdb_sqlstat_table_t *tbl, const char *name) { - int rc = 0; int bdberr = 0; - int key = tbl->nrows; - - rc = bdb_temp_table_put(thedb->bdb_env, tbl->tbl, &key, sizeof(key), row, - rowlen, NULL, &bdberr); - tbl->nrows++; - - return rc; -} - -#define RETRY_GET_STATS_PER_STAT 3 -static int fdb_sqlstat_populate_table(fdb_t *fdb, fdb_sqlstat_cache_t *cache, - BtCursor *cur, const char *tblname, - const char *sql, - /* out */ fdb_sqlstat_table_t *tbl) -{ - fdb_cursor_if_t *fdbc_if; - int bdberr = 0; - int rc = 0; - char *row; - int rowlen; - int irc; - int retry = 0; - - bzero(tbl, sizeof(*tbl)); tbl->tbl = bdb_temp_table_create(thedb->bdb_env, &bdberr); if (!tbl->tbl) { logmsg(LOGMSG_ERROR, "%s: failed to create temp table bdberr=%d\n", __func__, bdberr); return -1; } - tbl->name = strdup(tblname); + tbl->name = strdup(name); + tbl->nrows = 0; Pthread_mutex_init(&tbl->mtx, NULL); - - /* if remote failed to provide stats already, done here; we are running - with empty stats to prevent trying to read stats for every query */ - if (!cur->fdbc) - return 0; - - fdbc_if = cur->fdbc; - fdbc_if->set_sql(cur, sql); - - /* for schema changed sqlite stats, we need to provide the version! */ - fdb_cursor_use_table(fdbc_if->impl, fdb, tblname); - - /* try a few times here */ - do { - rc = fdbc_if->move(cur, CFIRST); - if (rc != IX_FND && rc != IX_FNDMORE) { - if (rc == FDB_ERR_FDB_VERSION) { - /* TODO: downgrade protocol */ - abort(); - } - if (rc != IX_EMPTY/* && rc != IX_PASTEOF*/) { - logmsg( - LOGMSG_ERROR, - "%s: failed to read first row from %s.%s rc=%d retry %d\n", - __func__, cache->fdbname, tbl->name, rc, retry); - /* error, try again, tbl untouched */ - if (cur->fdbc) - goto retry_io; - /* in this case, the remote did not answer repeated calls - and stats cursor is closed; we are done here */ - return 0; - } - /* empty stats */ - rc = 0; - goto close; - } - - do { - /* rows ! */ - row = fdbc_if->data(cur); - rowlen = fdbc_if->datalen(cur); - - irc = insert_sqlstat_row_from_packedsqlite(fdb, tbl, row, rowlen); - if (irc) { - logmsg( - LOGMSG_ERROR, - "%s: failed temp table insert for %s.%s rc=%d bdberr=%d\n", - __func__, cache->fdbname, tbl->name, rc, bdberr); - rc = irc; - goto retry_io; - } - - if (rc == IX_FNDMORE) { - rc = fdbc_if->move(cur, CNEXT); - } else { - break; - } - } while (rc == IX_FNDMORE || rc == IX_FND); - - if (rc == IX_FND || rc == IX_EMPTY) { - rc = 0; - /* success, get out of here */ - break; - } - /* fall-through if error to retry */ - retry_io: - if (tbl->nrows > 0) { - logmsg(LOGMSG_ERROR, - "%s: failed to read all rows from %s.%s rc=%d retry %d\n", - __func__, cache->fdbname, tbl->name, rc, retry); - - irc = bdb_temp_table_truncate(thedb->bdb_env, tbl->tbl, &bdberr); - if (irc) { - logmsg(LOGMSG_ERROR, - "%s: truncating the temp table also failed rc %d bdberr " - "%d\n", - __func__, irc, bdberr); - goto close; - } - tbl->nrows = 0; - } - if (db_is_exiting()) { - logmsg(LOGMSG_ERROR, "Interrupting %s, db is exiting\n", __func__); - fdbc_if->set_sql(cur, NULL); /* not owner of sql hint */ - return -1; - } - } while ((retry++) < RETRY_GET_STATS_PER_STAT); - -close: - fdbc_if->set_sql(cur, NULL); /* not owner of sql hint */ - -#if 0 - FOR NOW, LETS LEAVE THE TABLE IN PLACE WITH INCOMPLETE STATS, RATHER THAN FAIL REQUEST - if (rc) { - /* return a clean slate */ - logmsg(LOGMSG_ERROR, "%s: failed to retrieve stats from %s.%s rc=%d retry %d\n", - __func__, cache->fdbname, tbl->name, rc, retry); - if (tbl->tbl) { - irc = bdb_temp_table_close(thedb->bdb_env, tbl->tbl, &bdberr): - if (irc) { - logmsg(LOGMSG_ERROR, "%s: failed to close temp table too for %s.%s rc=%d retry %d\n", - __func__, cache->fdbname, tbl->name, irc, retry); - } - tbl->tbl = NULL; - free(tbl->name); - Pthread_mutex_destroy(&tbl->mtx); - } - } -#endif - - /* we retried a few times, return with partial stats */ return 0; } -static int fdb_sqlstat_cache_populate(struct sqlclntstate *clnt, fdb_t *fdb, - fdb_sqlstat_cache_t *cache) -{ - BtCursor *cur; - fdb_cursor_if_t *fdbc_if; - char *sql_stat1 = "select * from sqlite_stat1"; - char *sql_stat4 = "select * from sqlite_stat4 where tbl not like 'cdb2.%'"; - int rc = 0; - int irc; - - /* fake a BtCursor */ - cur = calloc(1, sizeof(BtCursor) + sizeof(Btree)); - if (!cur) { - rc = FDB_ERR_MALLOC; - logmsg(LOGMSG_ERROR, "%s: malloc\n", __func__); - goto done; - } - init_cursor(cur, NULL, (Btree *)(cur + 1)); - cur->bt->fdb = fdb; - cur->bt->is_remote = 1; - cur->rootpage = -1; /*not really used for sqlite_stats*/ - assert(cur->clnt == clnt); - - fdbc_if = - fdb_cursor_open(clnt, cur, cur->rootpage, NULL, NULL, 0 /* TODO */); - if (!fdbc_if) { - logmsg(LOGMSG_ERROR, "%s: failed to connect remote to get stats\n", - __func__); - rc = -1; - goto done; - } - - assert(cache->nalloc == 2); - - /* retrieve records */ - rc = fdb_sqlstat_populate_table(fdb, cache, cur, "sqlite_stat1", sql_stat1, - &cache->arr[0]); - if (rc) { - logmsg(LOGMSG_ERROR, "%s: failed to populate sqlite_stat1 rc=%d\n", - __func__, rc); - goto close; - } - - rc = fdb_sqlstat_populate_table(fdb, cache, cur, "sqlite_stat4", sql_stat4, - &cache->arr[1]); - if (rc) { - logmsg(LOGMSG_ERROR, "%s: failed to populate sqlite_stat4 rc=%d\n", - __func__, rc); - goto close; - } - -close: - /* close cursor */ - irc = fdbc_if->close(cur); - if (irc) { - logmsg(LOGMSG_ERROR, "%s: failed to close cursor rc=%d\n", __func__, - rc); - } -done: - return rc; -} - /** * Create the local cache, we are under a mutex * @@ -338,16 +138,25 @@ int fdb_sqlstat_cache_create(struct sqlclntstate *clnt, fdb_t *fdb, } Pthread_mutex_init(&cache->arr_lock, NULL); + if (__fdb_sqlstat_table_init(&cache->arr[0], "sqlite_stat1")) { + fdb_sqlstat_cache_destroy(&cache); + rc = -2; + goto done; + } + if (__fdb_sqlstat_table_init(&cache->arr[1], "sqlite_stat4")) { + fdb_sqlstat_cache_destroy(&cache); + rc = -3; + goto done; + } - rc = fdb_sqlstat_cache_populate(clnt, fdb, cache); + rc = fdb_sqlstat_cache_populate(clnt, fdb, cache->arr[0].tbl, cache->arr[1].tbl, &cache->arr[0].nrows, + &cache->arr[1].nrows); if (rc) { logmsg(LOGMSG_ERROR, "%s: failed to populate sqlite_stat tables, rc=%d\n", __func__, rc); - free(cache->arr); - free(cache); - cache = NULL; - rc = -2; + fdb_sqlstat_cache_destroy(&cache); + rc = -3; goto done; } @@ -358,58 +167,53 @@ int fdb_sqlstat_cache_create(struct sqlclntstate *clnt, fdb_t *fdb, return rc; } -static int fdb_sqlstat_depopulate_table(fdb_sqlstat_table_t *tbl) +static int __sqlstat_table_destroy(fdb_sqlstat_table_t *tbl) { int bdberr = 0; int rc = 0; - rc = bdb_temp_table_close(thedb->bdb_env, tbl->tbl, &bdberr); - if (rc) { - logmsg(LOGMSG_ERROR, "%s: failed to create temp table bdberr=%d\n", - __func__, bdberr); - } + if (tbl->tbl) { + rc = bdb_temp_table_close(thedb->bdb_env, tbl->tbl, &bdberr); + if (rc) { + logmsg(LOGMSG_ERROR, "%s: failed to create temp table bdberr=%d\n", __func__, bdberr); + } - free(tbl->name); - Pthread_mutex_destroy(&tbl->mtx); + free(tbl->name); + Pthread_mutex_destroy(&tbl->mtx); + } bzero(tbl, sizeof(*tbl)); return rc; } -static void fdb_sqlstat_cache_depopulate(fdb_sqlstat_cache_t *cache) +/** + * Destroy the local cache + * + */ +void fdb_sqlstat_cache_destroy(fdb_sqlstat_cache_t **pcache) { + fdb_sqlstat_cache_t *cache; int rc; + cache = *pcache; + + if (!cache) + return; + assert(cache->nalloc == 2); /* retrieve records */ - rc = fdb_sqlstat_depopulate_table(&cache->arr[0]); + rc = __sqlstat_table_destroy(&cache->arr[0]); if (rc) { logmsg(LOGMSG_ERROR, "%s: failed to depopulate sqlite_stat1 rc=%d\n", __func__, rc); } - rc = fdb_sqlstat_depopulate_table(&cache->arr[1]); + rc = __sqlstat_table_destroy(&cache->arr[1]); if (rc) { logmsg(LOGMSG_ERROR, "%s: failed to depopulate sqlite_stat4 rc=%d\n", __func__, rc); } -} - -/** - * Destroy the local cache - * - */ -void fdb_sqlstat_cache_destroy(fdb_sqlstat_cache_t **pcache) -{ - fdb_sqlstat_cache_t *cache; - - cache = *pcache; - - if (!cache) - return; - - fdb_sqlstat_cache_depopulate(cache); free(cache->arr); Pthread_mutex_destroy(&cache->arr_lock); @@ -423,20 +227,14 @@ void fdb_sqlstat_cache_destroy(fdb_sqlstat_cache_t **pcache) * */ /* NOTE: It locks access to sqlstat (for now) until closed */ -fdb_cursor_if_t *fdb_sqlstat_cache_cursor_open(struct sqlclntstate *clnt, - fdb_t *fdb, const char *name) +fdb_cursor_if_t *fdb_sqlstat_cache_cursor_open(struct sqlclntstate *clnt, fdb_t *fdb, const char *name, + fdb_sqlstat_cache_t *cache) { - fdb_sqlstat_cache_t *cache; fdb_sqlstat_table_t *tbl; fdb_sqlstat_cursor_t *fdbc; fdb_cursor_if_t *fdbc_if; int bdberr = 0; - cache = fdb_sqlstats_get(fdb); - - if (!cache) - return NULL; - if (is_stat1(name)) { tbl = &cache->arr[0]; } else if (is_stat4(name)) { @@ -483,7 +281,6 @@ fdb_cursor_if_t *fdb_sqlstat_cache_cursor_open(struct sqlclntstate *clnt, fdbc_if->tbl_has_partidx = fdb_sqlstat_cursor_has_partidx; fdbc_if->tbl_has_expridx = fdb_sqlstat_cursor_has_expridx; fdbc_if->dbname = fdb_sqlstat_cursor_dbname; - fdbc_if->table_entry = fdb_sqlstat_cursor_table_entry; fdbc_if->access = fdb_sqlstat_cursor_access; fdbc_if->move = fdb_sqlstat_cursor_move; fdbc_if->find = fdb_sqlstat_cursor_find; @@ -648,13 +445,6 @@ static char *fdb_sqlstat_cursor_dbname(BtCursor *pCur) return (char *)fdb_dbname_name(fdbc->fdb); } -static fdb_tbl_ent_t *fdb_sqlstat_cursor_table_entry(BtCursor *pCur) -{ - fdb_sqlstat_cursor_t *fdbc = (fdb_sqlstat_cursor_t *)pCur->fdbc->impl; - - return fdb_table_entry_by_name(fdbc->fdb, fdbc->name); -} - static int fdb_sqlstat_cursor_access(BtCursor *pCur, int how) { return 0; } static int fdb_sqlstat_cursor_insert(BtCursor *pCur, struct sqlclntstate *clnt, diff --git a/db/fdb_fend_cache.h b/db/fdb_fend_cache.h index 7601f32352..c21348b104 100644 --- a/db/fdb_fend_cache.h +++ b/db/fdb_fend_cache.h @@ -25,8 +25,8 @@ */ /* open a cursor to the sqlite_stat cache */ -fdb_cursor_if_t *fdb_sqlstat_cache_cursor_open(struct sqlclntstate *clnt, - fdb_t *fdb, const char *name); +fdb_cursor_if_t *fdb_sqlstat_cache_cursor_open(struct sqlclntstate *clnt, fdb_t *fdb, const char *name, + fdb_sqlstat_cache_t *cache); /* create a cache for a table diff --git a/db/fdb_push.c b/db/fdb_push.c index 9a58fe6a06..4c2cbc1a6e 100644 --- a/db/fdb_push.c +++ b/db/fdb_push.c @@ -20,6 +20,7 @@ extern char *gbl_cdb2api_policy_override; extern int gbl_fdb_auth_enabled; +extern int gbl_debug_disttxn_trace; struct fdb_push_connector { enum ast_type type; /* what type of request we override */ @@ -572,6 +573,7 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, int created; int rc; int set_intrans = 0; + const char *noverify = "SET VeRiFyReTRy OFF"; if (!push) return -2; @@ -579,7 +581,7 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, /* this was handled back here through an "error"; clear it */ clnt->had_errors = 0; - fdb = get_fdb(push->remotedb); + fdb = get_fdb(push->remotedb, FDB_GET_LOCK); if (!fdb) { logmsg(LOGMSG_ERROR, "FDB push missing fdb %s\n", push->remotedb); rc = -2; @@ -592,18 +594,24 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, rc = -2; goto free_push; } - /* fdb is the remote db we want, and it supports remote writes */ /* begin/join the transaction */ fdb_tran_t *tran = fdb_trans_begin_or_join(clnt, fdb, 0/*TODO*/, &created); - if (!tran) - return -1; + if (!tran) { + rc = -1; + goto put; + } assert(tran->is_cdb2api); if (created) { /* get a connection */ tran->is_cdb2api = 1; + + if (!n_extra_sets) { + n_extra_sets = 1; + sets = &noverify; + } tran->fcon.hndl = hndl = _hndl_open(clnt, NULL, 0 /* no sqlite rows for writes */, err, n_extra_sets, sets); if (!tran->fcon.hndl) { @@ -634,6 +642,7 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, if (rc != CDB2_OK_DONE) { goto hndl_err; } + tran->writes_status = FDB_TRAN_BEGIN; } } hndl = tran->fcon.hndl; @@ -644,8 +653,6 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, rc = _run_statement(clnt, hndl, err); if (rc != CDB2_OK) { goto hndl_err; - } else { - tran->nwrites++; } /* drain the socket */ @@ -655,13 +662,39 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, goto hndl_err; } - cdb2_effects_tp effects; - if (!clnt->in_client_trans || clnt->verifyretry_off) { - if ((rc = cdb2_get_effects(hndl, &effects))) { - logmsg(LOGMSG_ERROR, "%s:%d failed to get effects rc %d sql \"%s\"\n", - __func__, __LINE__, rc, clnt->sql); + cdb2_effects_tp effects = {0}; + if (!clnt->in_client_trans || clnt->verifyretry_off || clnt->use_2pc) { + rc = cdb2_get_effects(hndl, &effects); + if (rc) { + logmsg(LOGMSG_ERROR, "%s:%d failed to get effects rc %d sql \"%s\"\n", __func__, __LINE__, rc, clnt->sql); goto hndl_err; } + tran->nwrites += effects.num_inserted + effects.num_deleted + effects.num_updated; + if (tran->nwrites) + tran->writes_status = FDB_TRAN_WRITES; + + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, + "DISTTXN %s:%d %s use_2pc %d uuid=%s status %d writes %d ins %d del %d upd %d sel %d aff %d\n", + __func__, __LINE__, clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", clnt->use_2pc, + comdb2uuidstr(clnt->osql.uuid, us), tran->writes_status, tran->nwrites, effects.num_inserted, + effects.num_deleted, effects.num_updated, effects.num_selected, effects.num_affected); + } + } else { + /* this is non-2pc statements that are in a client transaction and have verify retry on + * we do not know if we have writes, we only marked the status as FDB_TRAN_BEGIN so we + * know we need to send back a commit/rollback + */ + } + + if (clnt->use_2pc && tran->writes_status == FDB_TRAN_WRITES) { + /* if this is 2pc, make sure we have a local transaction to coordinate remote writes */ + int rc = osql_sock_start_deferred(clnt); + if (rc) { + logmsg(LOGMSG_ERROR, "%s: failed to start sosql, rc=%d\n", __func__, rc); + return rc; + } } int ncols = cdb2_numcolumns(hndl); @@ -718,6 +751,14 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, sql_set_sqlengine_state(clnt, __FILE__, __LINE__, SQLENG_INTRANS_STATE); } + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN %s:%d %s use_2pc %d uuid=%s ins %d del %d upd %d sel %d aff %d\n", __func__, + __LINE__, clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", clnt->use_2pc, + comdb2uuidstr(clnt->osql.uuid, us), clnt->effects.num_inserted, clnt->effects.num_deleted, + clnt->effects.num_updated, clnt->effects.num_selected, clnt->effects.num_affected); + } + if (clnt->get_cost) { rc = _get_remote_cost(clnt, hndl, 1); if (rc) { @@ -725,10 +766,12 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, } } - if (!clnt->in_client_trans) + if (!clnt->in_client_trans) { goto free; + } - return 0; + rc = 0; + goto put; hndl_err: errstr = cdb2_errstr(hndl); @@ -745,8 +788,8 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, goto free; } errstat_set_rcstrf(err, rc, "%s", errstr); - rc = write_response(clnt, RESPONSE_ERROR, (void*)errstr, rc); - if (rc) { + int irc = write_response(clnt, RESPONSE_ERROR, (void *)errstr, rc); + if (irc) { logmsg(LOGMSG_DEBUG, "Failed to write error to client"); } free: @@ -769,8 +812,10 @@ int handle_fdb_push_write(sqlclntstate *clnt, struct errstat *err, */ clnt->intrans = 0; } - } + +put: + put_fdb(fdb, FDB_PUT_NOFREE); /* this could be reused */ return rc; } diff --git a/db/osqlblockproc.c b/db/osqlblockproc.c index b68f54013d..a63cf4184a 100644 --- a/db/osqlblockproc.c +++ b/db/osqlblockproc.c @@ -574,6 +574,12 @@ static int _pre_process_saveop(osql_sess_t *sess, blocksql_tran_t *tran, get_dist_txnid_from_dist_txn_rpl(tran->is_uuid, rpl, rplen, &sess->dist_txnid, &sess->dist_timestamp); assert(sess->dist_timestamp > 0); sess->is_coordinator = 1; + extern int gbl_debug_disttxn_trace; + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN %s %s new coordinator from %s uuid=%s\n", __func__, sess->dist_txnid, + sess->target.host ? sess->target.host : "(nohost)", comdb2uuidstr(sess->uuid, us)); + } break; case OSQL_PARTICIPANT: sess_save_participant(sess, tran->is_uuid, rpl, rplen); diff --git a/db/osqlsqlthr.c b/db/osqlsqlthr.c index a22482403f..7cfda38ec6 100644 --- a/db/osqlsqlthr.c +++ b/db/osqlsqlthr.c @@ -220,7 +220,14 @@ static int osql_sock_start_int(struct sqlclntstate *clnt, int type, comdb2uuid(osql->uuid); if (gbl_debug_disttxn_trace) { uuidstr_t us; - logmsg(LOGMSG_USER, "%s starting uuid %s\n", __func__, comdb2uuidstr(osql->uuid, us)); + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s starting uuid %s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", comdb2uuidstr(osql->uuid, us)); + } + } else { + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s reusing uuid %s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", comdb2uuidstr(osql->uuid, us)); } } @@ -406,7 +413,8 @@ static int osql_wait(struct sqlclntstate *clnt) int endms = comdb2_time_epochms(); if (gbl_debug_disttxn_trace) { uuidstr_t us; - logmsg(LOGMSG_USER, "%s took %d ms to commit rqid=%llu uuid=%s\n", __func__, (endms - startms), osql->rqid, + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s took %d ms to commit rqid=%llu uuid=%s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", (endms - startms), osql->rqid, comdb2uuidstr(osql->uuid, us)); } return rc; @@ -870,8 +878,8 @@ static int osql_sock_restart(struct sqlclntstate *clnt, int maxretries, int keep int sentops = 0; if (gbl_debug_disttxn_trace) { - logmsg(LOGMSG_USER, "%s restarting rqid=%llx uuid=%s keep-session=%d\n", __func__, clnt->osql.rqid, - comdb2uuidstr(clnt->osql.uuid, us), keep_session); + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s restarting uuid=%s keep-session=%d\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", comdb2uuidstr(clnt->osql.uuid, us), keep_session); } if (!thd) { @@ -1008,6 +1016,7 @@ int osql_sock_commit(struct sqlclntstate *clnt, int type, enum trans_clntcomm si int rcout = 0; int retries = 0; int bdberr = 0; + int is_distributed = 0; if (gbl_is_physical_replicant) { logmsg(LOGMSG_ERROR, "%s attempted write against physical replicant\n", __func__); @@ -1015,11 +1024,18 @@ int osql_sock_commit(struct sqlclntstate *clnt, int type, enum trans_clntcomm si return SQLITE_READONLY; } - /* temp hook for sql transactions */ + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s use_2pc %d %s uuid=%s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", clnt->use_2pc, + clnt->dbtran.dtran ? " has remote writes" : " no remote writes", comdb2uuidstr(clnt->osql.uuid, us)); + } + /* is it distributed? */ + if (clnt->dbtran.mode == TRANLEVEL_SOSQL && clnt->dbtran.dtran) { - rc = fdb_trans_commit(clnt, sideeffects); + rc = fdb_trans_commit(clnt, sideeffects, &is_distributed); if (rc) { logmsg(LOGMSG_ERROR, "%s distributed failure rc=%d\n", __func__, rc); @@ -1033,6 +1049,16 @@ int osql_sock_commit(struct sqlclntstate *clnt, int type, enum trans_clntcomm si return SQLITE_ABORT; } } + if (!is_distributed) { + /* we have no remote writes, disable 2pc */ + clnt->use_2pc = 0; + } + + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s after fdb_commit use_2pc %d uuid=%s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", clnt->use_2pc, comdb2uuidstr(clnt->osql.uuid, us)); + } osql->timings.commit_start = osql_log_time(); @@ -1206,6 +1232,12 @@ int osql_sock_commit(struct sqlclntstate *clnt, int type, enum trans_clntcomm si } done: + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s rc %d uuid=%s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", rc, comdb2uuidstr(clnt->osql.uuid, us)); + } + osql->timings.commit_end = osql_log_time(); /* mark socksql as non-retriable if seletv are present @@ -1255,6 +1287,19 @@ int osql_sock_commit(struct sqlclntstate *clnt, int type, enum trans_clntcomm si clnt->effects.num_updated += clnt->remote_effects.num_updated; bzero(&clnt->remote_effects, sizeof(clnt->remote_effects)); + /* lets reset here the dist txn info so it will not be reused with partial + * next info + */ + free(clnt->dist_txnid); + clnt->dist_txnid = NULL; + free(clnt->coordinator_dbname); + clnt->coordinator_dbname = NULL; + free(clnt->coordinator_tier); + clnt->coordinator_tier = NULL; + free(clnt->coordinator_master); + clnt->coordinator_master = NULL; + clnt->dist_timestamp = 0ULL; + return rcout; } @@ -1327,6 +1372,12 @@ int osql_sock_abort(struct sqlclntstate *clnt, int type) clnt->osql.tablenamelen = 0; } + if (gbl_debug_disttxn_trace) { + uuidstr_t us; + logmsg(LOGMSG_USER, "DISTTXN REPL %s %s rc %d uuid=%s\n", __func__, + clnt->dist_txnid ? clnt->dist_txnid : "(nodisttxn)", rc, comdb2uuidstr(clnt->osql.uuid, us)); + } + return rcout; } @@ -1546,7 +1597,7 @@ static int osql_send_commit_logic(struct sqlclntstate *clnt, int is_retry, do { rc = 0; - if (clnt->use_2pc && clnt->dist_txnid && clnt->sent_fdb_commit) { + if (clnt->use_2pc && clnt->dist_txnid) { assert((clnt->is_coordinator + clnt->is_participant) == 1); if (clnt->is_coordinator) { struct participant *p; diff --git a/db/sql.h b/db/sql.h index 52743e084c..386ea93a72 100644 --- a/db/sql.h +++ b/db/sql.h @@ -699,6 +699,8 @@ struct features { unsigned queue_me : 1; }; +struct clnt_fdb_cache; + /* Client specific sql state */ struct sqlclntstate { struct thdpool *pPool; /* When null, the default SQL thread pool is @@ -1060,7 +1062,6 @@ struct sqlclntstate { int use_2pc; int is_participant; int is_coordinator; - int sent_fdb_commit; char *dist_txnid; int64_t dist_timestamp; @@ -1088,6 +1089,7 @@ struct sqlclntstate { char *origin_argv0; unsigned blocking_tranlog : 1; + struct clnt_fdb_cache *remoteFdbCache; }; typedef struct sqlclntstate sqlclntstate; @@ -1615,7 +1617,7 @@ struct query_plan_item { }; int free_query_plan_hash(hash_t *query_plan_hash); int clear_query_plans(); -struct string_ref *form_query_plan(sqlite3_stmt *stmt); +struct string_ref *form_query_plan(struct sqlclntstate *clnt, sqlite3_stmt *stmt); void add_query_plan(int64_t cost, int64_t nrows, struct fingerprint_track *t, struct string_ref *zSql_ref, struct string_ref *query_plan_ref, unsigned char *plan_fingerprint, char *params); diff --git a/db/sqlexplain.c b/db/sqlexplain.c index 72ffdbeb80..6b72e12ec0 100644 --- a/db/sqlexplain.c +++ b/db/sqlexplain.c @@ -75,11 +75,10 @@ static char opcode_to_sign(int opcode) } } -static void print_field(Vdbe *v, struct cursor_info *cinfo, int num, char *buf) +static void print_field(struct sqlclntstate *clnt, Vdbe *v, struct cursor_info *cinfo, int num, char *buf) { if (cinfo->remote) { - sprintf(buf, "%s", fdb_sqlexplain_get_field_name(v, cinfo->rootpage, - cinfo->ix, num)); + sprintf(buf, "%s", fdb_sqlexplain_get_field_name(clnt, v, cinfo->rootpage, cinfo->ix, num)); return; } @@ -165,14 +164,14 @@ static void print_field(Vdbe *v, struct cursor_info *cinfo, int num, char *buf) } } -int print_cursor_description(strbuf *out, struct cursor_info *cinfo, int append_space) +int print_cursor_description(struct sqlclntstate *clnt, strbuf *out, struct cursor_info *cinfo, int append_space) { struct schema *sc; char scname[100]; int is_index = 0; if (cinfo->remote == 1) { - char *desc = fdb_sqlexplain_get_name(cinfo->rootpage); + char *desc = fdb_sqlexplain_get_name(clnt, cinfo->rootpage); strbuf_appendf(out, "remote %s", (desc) ? desc : "???"); free(desc); } else if (cinfo->istemp && cinfo->rootpage == 1) { @@ -392,6 +391,7 @@ void describe_cursor(Vdbe *v, int pc, struct cursor_info *cur) /* remote */ cur->rootpage = op->p2; cur->remote = 1; + cur->dbid = op->p3; } } @@ -521,9 +521,8 @@ void explain_data_delete(IndentInfo *p) p->nIndent = 0; } -void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, - int largestwidth, int pc, struct cursor_info *cur, - int *pSkipCount) +void get_one_explain_line(struct sqlclntstate *clnt, sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, int largestwidth, + int pc, struct cursor_info *cur, int *pSkipCount) { char str[2]; Op *op = &v->aOp[pc]; @@ -751,10 +750,10 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, strbuf_appendf(out, "%3d [%*s]: ", pc, largestwidth, sqlite3OpcodeName(op->opcode)); strbuf_appendf(out, "%*s", indent * 4, ""); - print_field(v, &cur[op->p1], op->p2, buf); /* field name into buf */ + print_field(clnt, v, &cur[op->p1], op->p2, buf); /* field name into buf */ strbuf_appendf(out, "R%d = %s from cursor [%d] on ", op->p3, buf, op->p1); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); } else { Op *colOp = &v->aOp[pc]; op = &v->aOp[pc_]; @@ -774,7 +773,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, strbuf_appendf(out, " (from cursor %d)", cursor); cursor = op->p1; } - print_field(v, &cur[op->p1], op->p2, buf); + print_field(clnt, v, &cur[op->p1], op->p2, buf); strbuf_appendf(out, "%s", buf); if (pc < pc_ - 1) { strbuf_append(out, ", "); @@ -805,7 +804,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, case OP_Count: strbuf_appendf(out, "R%d = select count(*) from cursor [%d] on ", op->p2, op->p1); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); break; case OP_Savepoint: strbuf_appendf( @@ -835,7 +834,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, describe_cursor(v, pc, &cur[op->p1]); strbuf_appendf(out, "Open read cursor [%d] if not already open on ", op->p1); - int is_index = print_cursor_description(out, &cur[op->p1], 1); + int is_index = print_cursor_description(clnt, out, &cur[op->p1], 1); if (is_index && op->opcode == OP_OpenRead) { if (op->p5 == 0xff) { strbuf_append(out, "(not a covering index)"); @@ -850,7 +849,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, describe_cursor(v, pc, &cur[op->p1]); strbuf_appendf(out, "Open %s cursor [%d] on ", (op->opcode != OP_OpenWrite ? "read" : "write"), op->p1); - is_index = print_cursor_description(out, &cur[op->p1], 1); + is_index = print_cursor_description(clnt, out, &cur[op->p1], 1); if (is_index && op->opcode == OP_OpenRead) { if (op->p5 == 0xff) { strbuf_append(out, "(not a covering index)"); @@ -963,13 +962,13 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, break; case OP_Insert: strbuf_appendf(out, "Write record in R%d into ", op->p2); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); strbuf_appendf(out, " using cursor [%d]", op->p1); break; case OP_Delete: strbuf_appendf(out, "Delete current record from cursor [%d] on", op->p1); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); break; case OP_ResetCount: strbuf_append( @@ -978,7 +977,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, case OP_RowData: strbuf_appendf(out, "R%d = key or data from cursor [%d] on ", op->p2, op->p1); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); break; case OP_Rowid: strbuf_appendf(out, "R%d = genid of row pointed by cursor [%d]", op->p2, @@ -989,7 +988,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, break; case OP_Last: strbuf_appendf(out, "Move cursor [%d] on ", op->p1); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); strbuf_append(out, "to last entry. "); if (op->p2) strbuf_appendf(out, "If no entries exist, go to %d", op->p2); @@ -1020,7 +1019,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, break; case OP_IdxInsert: strbuf_appendf(out, "Write key in R%d into ", op->p2); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); strbuf_appendf(out, "using cursor [%d]", op->p1); break; case OP_IdxDelete: @@ -1031,7 +1030,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, strbuf_appendf(out, "R%d", op->p2); } strbuf_appendf(out, " from "); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); strbuf_appendf(out, "using cursor [%d]", op->p1); break; case OP_IdxRowid: @@ -1054,11 +1053,11 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, break; case OP_Destroy: strbuf_append(out, "Destroy "); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); break; case OP_Clear: strbuf_append(out, "Delete all rows from "); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); break; case OP_RowSetAdd: strbuf_appendf(out, "Insert R%d into boolean index in R%d", op->p2, @@ -1118,7 +1117,7 @@ void get_one_explain_line(sqlite3 *hndl, strbuf *out, Vdbe *v, int indent, break; case OP_CursorHint: strbuf_appendf(out, "Cursor [%d] table ", op->p1); - print_cursor_description(out, &cur[op->p1], 1); + print_cursor_description(clnt, out, &cur[op->p1], 1); char *descr = sqlite3ExprDescribe(hndl->pVdbe, op->p4.pExpr); strbuf_appendf(out, " hint \"%s\"", (descr) ? descr : "(expression not parseable, see 592)"); @@ -1313,9 +1312,7 @@ int newsql_dump_query_plan(struct sqlclntstate *clnt, sqlite3 *hndl) if (indent < 0) indent = 0; int skipCount = 0; - get_one_explain_line( - hndl, out, v, indent, maxwidth, pc, cur, &skipCount - ); + get_one_explain_line(clnt, hndl, out, v, indent, maxwidth, pc, cur, &skipCount); if (skipCount != 0) pc += skipCount; char *row[] = {(char*)strbuf_buf(out)}; write_response(clnt, RESPONSE_ROW_STR, row, 1); diff --git a/db/sqlexplain.h b/db/sqlexplain.h index d59e6f324a..44145860eb 100644 --- a/db/sqlexplain.h +++ b/db/sqlexplain.h @@ -39,7 +39,7 @@ typedef struct { void explain_data_prepare(IndentInfo *p, Vdbe *v); void explain_data_delete(IndentInfo *p); -int print_cursor_description(strbuf *out, struct cursor_info *cinfo, int append_space); +int print_cursor_description(struct sqlclntstate *clnt, strbuf *out, struct cursor_info *cinfo, int append_space); void describe_cursor(Vdbe *v, int pc, struct cursor_info *cur); #endif /* _SQLEXPLAIN_H_ */ diff --git a/db/sqlglue.c b/db/sqlglue.c index ba5cc09fd3..19166b03fa 100644 --- a/db/sqlglue.c +++ b/db/sqlglue.c @@ -3645,12 +3645,8 @@ int sqlite3BtreeReopen( if (gbl_fdb_track) logmsg(LOGMSG_USER, "XXXXXXXXXXXXX ReOpening \"%s\"\n", zFilename); - pBtree->fdb = get_fdb(zFilename); - if (!pBtree->fdb) { - logmsg(LOGMSG_ERROR, "%s: fdb not available for %s ?\n", __func__, - zFilename); - rc = SQLITE_ERROR; - } + pBtree->fdb = get_fdb(zFilename, FDB_GET_NOLOCK); + assert(pBtree->fdb); reqlog_logf(pBtree->reqlogger, REQL_TRACE, "ReOpen(file %s, tree %d) = %s\n", @@ -3724,23 +3720,21 @@ int sqlite3BtreeOpen( thd->bttmp = bt; *ppBtree = bt; } else if (zFilename) { - /* TODO: maybe we should enforce unicity ? when attaching same dbs from - * multiple sql threads */ - /* remote database */ - bt->reqlogger = logger; bt->btreeid = id++; listc_init(&bt->cursors, offsetof(BtCursor, lnk)); bt->is_remote = 1; - /* NOTE: this is a lockless pointer; at the time of setting this, we got - a lock in sqlite3AddAndLockTable, so it should be good. The sqlite - engine will keep this structure around after fdb tables are changed. - While fdb will NOT go away, its tables can dissapear or change schema. - Cached schema in Table object needs to be matched against fdb->tbl and - make sure they are consistent before doing anything on the attached fdb - */ - bt->fdb = get_fdb(zFilename); + /* NOTE: this is done during attaching a new fdb to populate schema + * of sqlite with remote tables (inside comdb2_dynamic_attach) + * At this point we do have a live read lock acquired by sqlite3AddAndLockTable. + * It is safe to use this bt->fdb pointer until we call fdbUnlock + * During query execution, when we get table locks, a live lock is acquired again + * and at that point we update this link, which will be valid until table locks + * are released + * + */ + bt->fdb = get_fdb(zFilename, FDB_GET_NOLOCK); if (!bt->fdb) { logmsg(LOGMSG_ERROR, "%s: fdb not available for %s ?\n", __func__, zFilename); @@ -7710,18 +7704,8 @@ static inline int has_compressed_index(int iTable, BtCursor *cur, return rc; } -static int rootpcompare(const void *p1, const void *p2) +static int tablenamecompare(const void *p1, const void *p2) { -#if 0 - int i = *(int *)p1; - int j = *(int *)p2; - - if (i>j) - return 1; - if (iremoteFdbCache && fdb_clnt_cache_get_ent(clnt, iTable))) { logmsg(LOGMSG_ERROR, "%s: no such table: %s\n", __func__, tab->zName); sqlite3_mutex_enter(sqlite3_db_mutex(p->db)); @@ -8147,7 +8131,7 @@ int sqlite3UnlockStmtTablesRemotes(struct sqlclntstate *clnt) continue; /* this is a remote table; we need to release remote locks */ - rc = fdb_unlock_table(clnt->dbtran.lockedRemTables[i]); + rc = fdb_unlock_table(clnt, clnt->dbtran.lockedRemTables[i]); if (rc) { logmsg(LOGMSG_ERROR, "Failed to unlock remote table cache for \"%s\"\n", fdb_table_entry_tblname(clnt->dbtran.lockedRemTables[i])); @@ -8181,12 +8165,8 @@ sqlite3BtreeCursor_remote(Btree *pBt, /* The btree */ /* this doesn't get a lock, by this time we have acquired a table lock here */ - fdb = get_fdb(pBt->zFilename); - if (!pBt->fdb) { - logmsg(LOGMSG_ERROR, "%s: failed to retrieve db \"%s\"\n", __func__, - pBt->zFilename); - return SQLITE_INTERNAL; - } + fdb = get_fdb(pBt->zFilename, FDB_GET_NOLOCK); + assert(fdb); assert(fdb == pBt->fdb); cur->cursor_class = CURSORCLASS_REMOTE; @@ -8205,7 +8185,7 @@ sqlite3BtreeCursor_remote(Btree *pBt, /* The btree */ } /* set a transaction id if none is set yet */ - if ((iTable >= RTPAGE_START) && !fdb_is_sqlite_stat(fdb, cur->rootpage)) { + if ((iTable >= RTPAGE_START) && !fdb_is_sqlite_stat(clnt, cur->rootpage)) { /* I would like to open here a transaction if this is an actual update */ if (!clnt->isselect /* TODO: maybe only create one if we write to remote && fdb_write_is_remote()*/) { @@ -8869,7 +8849,10 @@ static int chunk_transaction(BtCursor *pCur, struct sqlclntstate *clnt, */ rdlock_schema_lk(); assert(pCur->vdbe == (Vdbe*)clnt->dbtran.pStmt); - /* remote table locks aren't berkeleydb locks. release them here */ + /* remote table locks aren't berkeleydb locks. release them here + * TODO: why are we releasing remote locks, these are needed + * to protect remote access further calls + */ if (clnt->dbtran.nLockedRemTables > 0) newlocks_rc = sqlite3UnlockStmtTablesRemotes(clnt); if (newlocks_rc == 0) @@ -13076,14 +13059,9 @@ void comdb2_set_verify_remote_schemas(void) } } -int comdb2_get_verify_remote_schemas(void) +int comdb2_get_verify_remote_schemas(struct sqlclntstate *clnt) { - struct sql_thread *thd = pthread_getspecific(query_info_key); - - if (thd && thd->clnt) - return thd->clnt->verify_remote_schemas == 1; - - return 0; + return clnt ? clnt->verify_remote_schemas == 1 : 0; } uint16_t stmt_num_tbls(sqlite3_stmt *stmt) diff --git a/db/sqlinterfaces.c b/db/sqlinterfaces.c index e88ac55ffc..58042cf817 100644 --- a/db/sqlinterfaces.c +++ b/db/sqlinterfaces.c @@ -1813,6 +1813,8 @@ int handle_sql_begin(struct sqlthdstate *thd, struct sqlclntstate *clnt, reqlog_logf(thd->logger, REQL_QUERY, "\"%s\" new transaction\n", (clnt->sql) ? clnt->sql : "(???.)"); + clnt->use_2pc = gbl_2pc; + /* Latch the last commit LSN */ assert(!clnt->modsnap_in_progress); if (clnt->dbtran.mode == TRANLEVEL_MODSNAP && (populate_modsnap_state(clnt) != 0)) { @@ -3330,7 +3332,7 @@ static int get_prepared_stmt_int(struct sqlthdstate *thd, clnt->in_sqlite_init = 0; if (rc == SQLITE_OK) { if (!prepareOnly) rc = sqlite3LockStmtTables(rec->stmt); - } else if (rc == SQLITE_ERROR && comdb2_get_verify_remote_schemas()) { + } else if (rc == SQLITE_ERROR && comdb2_get_verify_remote_schemas(clnt)) { sqlite3ResetFdbSchemas(thd->sqldb); return SQLITE_SCHEMA_REMOTE; } diff --git a/db/sqloffload.c b/db/sqloffload.c index 5f889fb463..92bec3f967 100644 --- a/db/sqloffload.c +++ b/db/sqloffload.c @@ -402,14 +402,13 @@ int recom_commit(struct sqlclntstate *clnt, struct sql_thread *thd, /* temp hook for sql transactions */ if (clnt->dbtran.dtran) { - rc = fdb_trans_commit(clnt, TRANS_CLNTCOMM_NORMAL); + rc = fdb_trans_commit(clnt, TRANS_CLNTCOMM_NORMAL, &is_distributed_tran); if (rc) { logmsg(LOGMSG_ERROR, "%s distributed failure rc=%d\n", __func__, rc); return rc; } } - is_distributed_tran = (is_distributed_tran && clnt->sent_fdb_commit); return rese_commit(clnt, thd, tzname, OSQL_RECOM_REQ, is_distributed_tran); } diff --git a/db/toblock.c b/db/toblock.c index 778990a7ca..84ee5b2d56 100644 --- a/db/toblock.c +++ b/db/toblock.c @@ -5025,20 +5025,23 @@ static int toblock_main_int(struct javasp_trans_state *javasp_trans_handle, stru } /* Fake a verify error */ + /* disable this for now; we will reenable once fdb_push can retry on + * verify error + extern int gbl_toblock_random_verify_error; if (!rc && gbl_toblock_random_verify_error && (rand() % 100) == 0) { logmsg(LOGMSG_USER, "%s throwing random verify error\n", __func__); outrc = ERR_BLOCK_FAILED; rc = ERR_VERIFY; check_serializability = 1; - opnum = blkpos; /* so we report the failed blockop accurately */ + opnum = blkpos; // so we report the failed blockop accurately err.blockop_num = blkpos; err.errcode = ERR_VERIFY; err.ixnum = ixout; numerrs = 1; reqlog_set_error(iq->reqlogger, "Debug random verify error", rc); GOTOBACKOUT; - } + }*/ } /* end delayed */ else { ++gbl_delayed_skip; diff --git a/sqlite/src/attach.c b/sqlite/src/attach.c index 49d1b22c0c..8dc25374f1 100644 --- a/sqlite/src/attach.c +++ b/sqlite/src/attach.c @@ -354,8 +354,17 @@ static void attachFunc( #if defined(SQLITE_BUILDING_FOR_COMDB2) Table *p; int savedBusy = db->init.busy; + void *savedLockedtable = db->init.locked_table; + void *savedLockedstat1 = db->init.locked_stat1; + void *savedLockedstat4 = db->init.locked_stat4; + void *savedfdb = db->init.fdb; db->init.busy = 0; /* TODO: prevent assert (?) */ + db->init.locked_table = NULL; + db->init.locked_stat1 = NULL; + db->init.locked_stat4 = NULL; + db->init.fdb = NULL; + char *zTmp = (char*)zName; /* we need to take care of override and local */ if (local) { @@ -366,6 +375,10 @@ static void attachFunc( } rc = sqlite3InitTable(db, &zErrDyn, zTmp); db->init.busy = savedBusy; + db->init.locked_table = savedLockedtable; + db->init.locked_stat1 = savedLockedstat1; + db->init.locked_stat4 = savedLockedstat4; + db->init.fdb = savedfdb; if (zTmp != zName) sqlite3DbFree(db, zTmp); diff --git a/sqlite/src/build.c b/sqlite/src/build.c index 1f2d9d7304..52f2f1885a 100644 --- a/sqlite/src/build.c +++ b/sqlite/src/build.c @@ -34,7 +34,7 @@ int has_comdb2_index_for_sqlite(Table *pTab); int is_comdb2_index_unique(const char *dbname, char *idx); const char* fdb_parse_comdb2_remote_dbname(const char *zDatabase, const char **fqDbname); -int fdb_validate_existing_table(const char *zDatabase); +int fdb_validate_existing(const char *zDatabase); char *fdb_get_alias(const char **p_tablename); int comdb2_check_parallel(Parse*); int comdb2_check_push_remote(Parse*); @@ -491,7 +491,7 @@ Table *sqlite3FindTable(sqlite3 *db, const char *zName, const char *zDatabase){ if( unlikely(zDatabase) && !db->init.busy ){ /* we need to validate class here, before shortcutting to "local table" mode */ - if( fdb_validate_existing_table(zDatabase) ){ + if( fdb_validate_existing(zDatabase) ){ logmsg(LOGMSG_USER, "Remote db table exists and class mismatches \"%s:%s\"\n", fqDbname, zName); @@ -551,8 +551,9 @@ Table *sqlite3FindTable(sqlite3 *db, const char *zName, const char *zDatabase){ } int lvl, local, lvl_override; - rc = sqlite3AddAndLockTable(db, fqDbname, zName, &version, - in_analysis_load, &lvl, &local, &lvl_override, &server_version); + /* this will get us a read lock fdb object */ + rc = sqlite3AddAndLockTable(&db->init, fqDbname, zName, &version, + &lvl, &local, &lvl_override, &server_version); if( rc ){ if( gbl_fdb_track ) logmsg(LOGMSG_USER, "No foreign table \"%s:%s\"\n", fqDbname, zName); @@ -574,10 +575,7 @@ Table *sqlite3FindTable(sqlite3 *db, const char *zName, const char *zDatabase){ rc = comdb2_dynamic_attach(db, NULL, 0, NULL, uri, dbName, &zErrDyn, version, lvl, local, lvl_override, server_version); - if( sqlite3UnlockTable(dbName, zName) ){ - logmsg(LOGMSG_ERROR, "%s: failed to unlock %s.%s\n", __func__, - fqDbname, zName); - } + fdbUnlock(&db->init); if( rc || zErrDyn ) { logmsg(LOGMSG_ERROR, "%s: failed to find table %s rc=%d %s\n", diff --git a/sqlite/src/sqliteInt.h b/sqlite/src/sqliteInt.h index 4d521f052c..3975ffa1b3 100644 --- a/sqlite/src/sqliteInt.h +++ b/sqlite/src/sqliteInt.h @@ -1436,6 +1436,23 @@ void sqlite3CryptFunc(sqlite3_context*,int,sqlite3_value**); #define SQLITE_TRACE_NONLEGACY_MASK 0x0f /* Normal flags */ +struct sqlite3InitInfo { /* Information used during initialization */ +#if defined(SQLITE_BUILDING_FOR_COMDB2) + char *zTblName; /* Optional table name for attachments */ +#endif /* defined(SQLITE_BUILDING_FOR_COMDB2) */ + int newTnum; /* Rootpage of table being initialized */ + u8 iDb; /* Which db file is being initialized */ + u8 busy; /* TRUE if currently initializing */ + unsigned orphanTrigger : 1; /* Last statement is orphaned TEMP trigger */ + unsigned imposterTable : 1; /* Building an imposter table */ + unsigned reopenMemdb : 1; /* ATTACH is really a reopen using MemDB */ + void *locked_table; /* transient transfer info from sqlite3AddAndLockTable to fdbUnlock */ + void *locked_stat1; + void *locked_stat4; + void *fdb; +}; +typedef struct sqlite3InitInfo sqlite3InitInfo; + /* ** Each database connection is an instance of the following structure. */ @@ -1478,17 +1495,7 @@ struct sqlite3 { int nTotalChange; /* Value returned by sqlite3_total_changes() */ int aLimit[SQLITE_N_LIMIT]; /* Limits */ int nMaxSorterMmap; /* Maximum size of regions mapped by sorter */ - struct sqlite3InitInfo { /* Information used during initialization */ -#if defined(SQLITE_BUILDING_FOR_COMDB2) - char *zTblName; /* Optional table name for attachments */ -#endif /* defined(SQLITE_BUILDING_FOR_COMDB2) */ - int newTnum; /* Rootpage of table being initialized */ - u8 iDb; /* Which db file is being initialized */ - u8 busy; /* TRUE if currently initializing */ - unsigned orphanTrigger : 1; /* Last statement is orphaned TEMP trigger */ - unsigned imposterTable : 1; /* Building an imposter table */ - unsigned reopenMemdb : 1; /* ATTACH is really a reopen using MemDB */ - } init; + sqlite3InitInfo init; /* Information used during initialization */ int nVdbeActive; /* Number of VDBEs currently running */ int nVdbeRead; /* Number of active VDBEs that read or write */ int nVdbeWrite; /* Number of active VDBEs that read and write */ @@ -5036,16 +5043,15 @@ void sqlite3_tunables_init(void); void sqlite3_dump_tunables(void); void sqlite3_set_tunable_by_name(char *tname, char *val); -extern int sqlite3AddAndLockTable(sqlite3 *db, const char *dbname, - const char *table, int *version, int in_analysis_load, +extern void fdbUnlock(sqlite3InitInfo *init); +extern int sqlite3AddAndLockTable(sqlite3InitInfo *init, const char *dbname, + const char *table, int *version, int *out_class, int *out_local, int *out_class_override, int *proto_version); -extern int sqlite3UnlockTable(const char *dbname, const char *table); extern int comdb2_dynamic_attach(sqlite3 *db, sqlite3_context *context, int argc, sqlite3_value **argv, const char *zName, const char *zFile, char **pzErrDyn, int version, int class, int local, int class_override, int proto_version); extern void comdb2_dynamic_detach(sqlite3 *db, int idx); -extern int comdb2_fdb_check_class(const char *dbname); int sqlite3InitTable(sqlite3 *db, char **pzErrMsg, const char *zName); extern int sqlite3UpdateMemCollAttr(BtCursor *pCur, int idx, Mem *mem); char* sqlite3ExprDescribe(Vdbe *v, const Expr *pExpr); diff --git a/tests/disttxn.test/lrl.options b/tests/disttxn.test/lrl.options index df88bb4e66..bd721d6e5c 100644 --- a/tests/disttxn.test/lrl.options +++ b/tests/disttxn.test/lrl.options @@ -81,8 +81,4 @@ fdbdebg 1 allow_coordinator foobardb/prod allow_coordinator dbeefdb/prod -#Disable push modes -foreign_db_push_remote 0 -foreign_db_push_remote_writes 0 -fdb_remsql_cdb2api 0 - +#setattr SOSQL_MAX_COMMIT_WAIT_SEC 30 diff --git a/tests/misstable_remsql.test/test_missing.sh b/tests/misstable_remsql.test/test_missing.sh index 841a1aa670..7170b2b240 100755 --- a/tests/misstable_remsql.test/test_missing.sh +++ b/tests/misstable_remsql.test/test_missing.sh @@ -80,7 +80,7 @@ output=run.2.out #purge local information #comdb2sc $a_dbname send fdb clear schema $a_remdbname > $output 2>&1 -cdb2sql ${SRC_CDB2_OPTIONS} --tabs --host $mach $a_dbname "exec procedure sys.cmd.send(\"fdb clear schema $a_remdbname\")" > $output 2>&1 +cdb2sql ${SRC_CDB2_OPTIONS} --tabs --host $mach $a_dbname "exec procedure sys.cmd.send(\"fdb force clear schema $a_remdbname\")" > $output 2>&1 # get the version V2 #comdb2sc $a_dbname send fdb info db >> $output 2>&1 diff --git a/tests/quarantine.csv b/tests/quarantine.csv index 41d4ca3a80..75cf199425 100644 --- a/tests/quarantine.csv +++ b/tests/quarantine.csv @@ -37,5 +37,3 @@ sc_resume_logicalsc_generated,UNKNOWN,181484807 consumer_non_atomic_default_consumer_generated,DB_BUG,181573461 unifiedcancel,UNKNOWN,181657289 autoanalyze,UNKNOWN,181657299 -remsql_locks,DB_BUG,182504318 -remsql_locks_rte_connect_generated,DB_BUG,182504318 diff --git a/tests/remsql_locks.test/runit b/tests/remsql_locks.test/runit index 6cfc349fa0..24cddaa6e9 100755 --- a/tests/remsql_locks.test/runit +++ b/tests/remsql_locks.test/runit @@ -23,14 +23,14 @@ if [[ ! "$?" -eq "0" ]] ; then echo "Failed to run the query on t2" failed=1 fi -if [[ ! "$found_cnt" -eq "60" ]] ; then +if (( $found_cnt != 60 )) ; then echo "Failed to return all rows" failed=1 fi wait -if [[ "$failed" = "1" ]] ; then +if (( $failed == 1 )) ; then echo "FAILURE" exit 1 fi