diff --git a/bdb/bdb_cursor.h b/bdb/bdb_cursor.h index 5a08a0c8a7..a98ac9a3b4 100644 --- a/bdb/bdb_cursor.h +++ b/bdb/bdb_cursor.h @@ -199,6 +199,11 @@ int bdb_get_lsn_context_from_timestamp(bdb_state_type *bdb_state, int bdb_get_context_from_lsn(bdb_state_type *bdb_state, void *lsnp, unsigned long long *ret_context, int *bdberr); -int bdb_direct_count(bdb_cursor_ifn_t *, int ixnum, int64_t *count, int is_snapcur, uint32_t last_commit_lsn_file, uint32_t last_commit_lsn_offset, uint32_t last_checkpoint_lsn_file, uint32_t last_checkpoint_lsn_offset); +int bdb_direct_count_int(bdb_state_type *state, int ixnum, int64_t *rcnt, int is_snapcur, + uint32_t modsnap_start_lsn_file, uint32_t modsnap_start_lsn_offset, + uint32_t last_checkpoint_lsn_file, uint32_t last_checkpoint_lsn_offset, int parallel_count); +int bdb_direct_count(bdb_cursor_ifn_t *cur, int ixnum, int64_t *rcnt, int is_snapcur, uint32_t modsnap_start_lsn_file, + uint32_t modsnap_start_lsn_offset, uint32_t last_checkpoint_lsn_file, + uint32_t last_checkpoint_lsn_offset); #endif diff --git a/bdb/count.c b/bdb/count.c index 0231c30aaf..d6cce4ffc4 100644 --- a/bdb/count.c +++ b/bdb/count.c @@ -208,6 +208,7 @@ int bdb_count_int(bdb_state_type *bdb_state, int *bdberr) return -1; } + // TODO: We are not using the smallest index here, why? We are using ix0 rc = bdb_state->dbp_ix[0]->cursor(bdb_state->dbp_ix[0], 0, &dbcp, 0); if (rc != 0) { myfree(buffer); @@ -300,7 +301,13 @@ int bdb_count(bdb_state_type *bdb_state, int *bdberr) int ret; BDB_READLOCK("bdb_count"); - ret = bdb_count_int(bdb_state, bdberr); + // ret = bdb_count_int(bdb_state, bdberr); + int64_t count; + // bdb_count_int would use ix 0 if available, do the same thing here + ret = bdb_direct_count_int(bdb_state, bdb_state->numix > 0 ? 0 : -1, &count, 0, 0, 0, 0, 0, 0); BDB_RELLOCK(); - return ret; + if (ret == 0) + return count; + *bdberr = (ret == BDBERR_DEADLOCK ? ret : BDBERR_MISC); + return -1; } diff --git a/bdb/cursor.c b/bdb/cursor.c index c5ef873d9d..f402a3036b 100644 --- a/bdb/cursor.c +++ b/bdb/cursor.c @@ -7454,19 +7454,92 @@ static void *db_count(void *varg) return NULL; } -int gbl_parallel_count = 0; -int bdb_direct_count(bdb_cursor_ifn_t *cur, int ixnum, int64_t *rcnt, int is_snapcur, uint32_t modsnap_start_lsn_file, uint32_t modsnap_start_lsn_offset, uint32_t last_checkpoint_lsn_file, uint32_t last_checkpoint_lsn_offset) +static int find_recnum_first_last(DBC *dbcp, int *rnum, int last) +{ + DBT dbt_key, dbt_data; + int recnum; + int rc; + memset(&dbt_key, 0, sizeof(dbt_key)); + memset(&dbt_data, 0, sizeof(dbt_data)); + dbt_key.flags |= DB_DBT_MALLOC; + dbt_data.flags |= DB_DBT_MALLOC; + rc = dbcp->c_get(dbcp, &dbt_key, &dbt_data, last ? DB_LAST : DB_FIRST); + if (rc) + return rc; + if (dbt_key.data) + free(dbt_key.data); + if (dbt_data.data) + free(dbt_data.data); + + memset(&dbt_data, 0, sizeof(dbt_data)); + dbt_data.data = &recnum; + dbt_data.ulen = sizeof(int); + dbt_data.flags |= DB_DBT_USERMEM; + rc = dbcp->c_get(dbcp, &dbt_key, &dbt_data, DB_GET_RECNO); + if (rc) + return rc; + *rnum = recnum; + return 0; +} + +static int find_count_recnums(bdb_state_type *state, int64_t *rcnt) +{ + int lowrecnum; + int highrecnum; + int rc; + DBC *dbcp; + if (!state->have_recnums) + return -1; + for (int i = 0; i < state->numix; i++) { + if (!state->ixrecnum[i]) + continue; + rc = state->dbp_ix[i]->cursor(state->dbp_ix[i], 0, &dbcp, 0); + if (rc) + return rc; + if ((rc = find_recnum_first_last(dbcp, &lowrecnum, 0))) { + dbcp->c_close(dbcp); + return rc; + } + if ((rc = find_recnum_first_last(dbcp, &highrecnum, 1))) { + dbcp->c_close(dbcp); + return rc; + } + dbcp->c_close(dbcp); + *rcnt = highrecnum - lowrecnum + 1; + return 0; + } + return -1; +} + +int bdb_direct_count_int(bdb_state_type *state, int ixnum, int64_t *rcnt, int is_snapcur, + uint32_t modsnap_start_lsn_file, uint32_t modsnap_start_lsn_offset, + uint32_t last_checkpoint_lsn_file, uint32_t last_checkpoint_lsn_offset, int parallel_count) { int64_t count = 0; - int parallel_count; - bdb_state_type *state = cur->impl->state; + int rc = 0; DB **db; int stripes; pthread_attr_t attr; + + // first try recnums optimization + if (state->have_recnums) { + rc = find_count_recnums(state, &count); + if (rc == DB_LOCK_DEADLOCK) { + rc = BDBERR_DEADLOCK; + } else if (rc == DB_NOTFOUND) { + rc = 0; + } else if (rc != 0) { + rc = -1; + } + + if (rc == 0) + *rcnt = count; + return rc; + } + if (ixnum < 0) { // data db = state->dbp_data[0]; stripes = state->attr->dtastripe; - parallel_count = gbl_parallel_count; Pthread_attr_init(&attr); #ifdef PTHREAD_STACK_MIN Pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 512 * 1024); @@ -7493,7 +7566,6 @@ int bdb_direct_count(bdb_cursor_ifn_t *cur, int ixnum, int64_t *rcnt, int is_sna db_count(&args[i]); } } - int rc = 0; void *ret; for (int i = 0; i < stripes; ++i) { if (parallel_count) { @@ -7516,3 +7588,13 @@ int bdb_direct_count(bdb_cursor_ifn_t *cur, int ixnum, int64_t *rcnt, int is_sna if (rc == 0) *rcnt = count; return rc; } + +int gbl_parallel_count = 0; +int bdb_direct_count(bdb_cursor_ifn_t *cur, int ixnum, int64_t *rcnt, int is_snapcur, uint32_t modsnap_start_lsn_file, + uint32_t modsnap_start_lsn_offset, uint32_t last_checkpoint_lsn_file, + uint32_t last_checkpoint_lsn_offset) +{ + return bdb_direct_count_int(cur->impl->state, ixnum, rcnt, is_snapcur, modsnap_start_lsn_file, + modsnap_start_lsn_offset, last_checkpoint_lsn_file, last_checkpoint_lsn_offset, + gbl_parallel_count); +} diff --git a/tests/recnum.test/runit b/tests/recnum.test/runit index 416caddd7a..989000ee4d 100755 --- a/tests/recnum.test/runit +++ b/tests/recnum.test/runit @@ -36,4 +36,16 @@ if [[ $leaked_pages -lt 0 ]]; then cat out.res exit 1 fi +count=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "select count(*) from t") +if [[ $count != 10000 ]]; then + echo "Expected count 10000, got count $count" + exit 1 +fi + +cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "truncate t" +count=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "select count(*) from t") +if [[ $count != 0 ]]; then + echo "Expected count 0, got count $count" + exit 1 +fi exit 0