diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 90e6d698..585f7b38 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -239,6 +239,7 @@ datalen DATANAME DATASIGN datasync +dataview datetime DBC dbenv diff --git a/CMakeLists.txt b/CMakeLists.txt index 20a50a45..d41686bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -340,10 +340,12 @@ if(NOT DEFINED MDBX_CXX_STANDARD) set(MDBX_CXX_STANDARD 98) endif() endif() -if(NOT HAS_C11 LESS 0) - set(MDBX_C_STANDARD 11) -else() +# MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! +# It unable process Windows SDK headers in the C11 mode! +if(HAS_C11 LESS 0 OR (MSVC AND MSVC_VERSION GREATER 1927)) set(MDBX_C_STANDARD 99) +else() + set(MDBX_C_STANDARD 11) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") @@ -756,14 +758,21 @@ if(NOT CMAKE_CONFIGURATION_TYPES) endif() endif() +# choice target to fetch definitions and options +if(MDBX_BUILD_SHARED_LIBRARY) + set(target4fetch mdbx) +else() + set(target4fetch mdbx-static) +endif() + # get definitions -get_target_property(defs_list mdbx-static COMPILE_DEFINITIONS) +get_target_property(defs_list ${target4fetch} COMPILE_DEFINITIONS) if(defs_list) list(APPEND MDBX_BUILD_FLAGS ${defs_list}) endif() # get target compile options -get_target_property(options_list mdbx-static COMPILE_OPTIONS) +get_target_property(options_list ${target4fetch} COMPILE_OPTIONS) if(options_list) list(APPEND MDBX_BUILD_FLAGS ${options_list}) endif() diff --git a/mdbx.h b/mdbx.h index 03fa4afa..39f304f2 100644 --- a/mdbx.h +++ b/mdbx.h @@ -483,9 +483,18 @@ typedef mode_t mdbx_mode_t; MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, ENUM b) { \ return ENUM(std::size_t(a) & std::size_t(b)); \ } \ + MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, size_t b) { \ + return ENUM(std::size_t(a) & b); \ + } \ + MDBX_CXX01_CONSTEXPR ENUM operator&(size_t a, ENUM b) { \ + return ENUM(a & std::size_t(b)); \ + } \ MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, ENUM b) { return a = a & b; } \ - MDBX_CXX01_CONSTEXPR ENUM operator~(ENUM a) { \ - return ENUM(~std::size_t(a)); \ + MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, size_t b) { \ + return a = a & b; \ + } \ + MDBX_CXX01_CONSTEXPR std::size_t operator~(ENUM a) { \ + return ~std::size_t(a); \ } \ MDBX_CXX01_CONSTEXPR ENUM operator^(ENUM a, ENUM b) { \ return ENUM(std::size_t(a) ^ std::size_t(b)); \ @@ -1505,7 +1514,20 @@ enum MDBX_cursor_op { /** \ref MDBX_DUPFIXED -only: Position at previous page and return up to * a page of duplicate data items. */ - MDBX_PREV_MULTIPLE + MDBX_PREV_MULTIPLE, + + /** Position at first key-value pair greater than or equal to specified, + * return both key and data, and the return code depends on a exact match. + * + * For non DUPSORT-ed collections this work the same to \ref MDBX_SET_RANGE, + * but returns \ref MDBX_SUCCESS if key found exactly and + * \ref MDBX_RESULT_TRUE if greater key was found. + * + * For DUPSORT-ed a data value is taken into account for duplicates, + * i.e. for a pairs/tuples of a key and an each data value of duplicates. + * Returns \ref MDBX_SUCCESS if key-value pair found exactly and + * \ref MDBX_RESULT_TRUE if the next pair was returned. */ + MDBX_SET_LOWERBOUND }; #ifndef __cplusplus /** \ingroup c_cursors */ @@ -3861,6 +3883,18 @@ mdbx_cursor_txn(const MDBX_cursor *cursor); * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). */ LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *cursor); +/** \brief Copy cursor position and state. + * \ingroup c_cursors + * + * \param [in] src A source cursor handle returned + * by \ref mdbx_cursor_create() or \ref mdbx_cursor_open(). + * + * \param [in,out] dest A destination cursor handle returned + * by \ref mdbx_cursor_create() or \ref mdbx_cursor_open(). + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); + /** \brief Retrieve by cursor. * \ingroup c_crud * diff --git a/src/core.c b/src/core.c index cfeeb0b3..d9ddad50 100644 --- a/src/core.c +++ b/src/core.c @@ -3240,7 +3240,7 @@ static int __must_check_result mdbx_xcursor_init1(MDBX_cursor *mc, static int __must_check_result mdbx_xcursor_init2(MDBX_cursor *mc, MDBX_xcursor *src_mx, bool new_dupdata); -static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst); +static void cursor_copy_internal(const MDBX_cursor *csrc, MDBX_cursor *cdst); static int __must_check_result mdbx_drop0(MDBX_cursor *mc, int subs); static int __must_check_result mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi); @@ -3599,16 +3599,26 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) { (mc)->mc_xcursor->mx_cursor.mc_pg[0] = node_data(xr_node); \ } while (0) +static __maybe_unused bool cursor_is_tracked(const MDBX_cursor *mc) { + for (MDBX_cursor *scan = mc->mc_txn->tw.cursors[mc->mc_dbi]; scan; + scan = scan->mc_next) + if (mc == ((mc->mc_flags & C_SUB) ? &scan->mc_xcursor->mx_cursor : scan)) + return true; + return false; +} + /* Perform act while tracking temporary cursor mn */ #define WITH_CURSOR_TRACKING(mn, act) \ do { \ mdbx_cassert(&(mn), \ - mn.mc_txn->mt_cursors != NULL /* must be not rdonly txt */); \ + mn.mc_txn->tw.cursors != NULL /* must be not rdonly txt */); \ + mdbx_cassert(&(mn), !cursor_is_tracked(&(mn))); \ MDBX_cursor mc_dummy; \ - MDBX_cursor **tracking_head = &(mn).mc_txn->mt_cursors[mn.mc_dbi]; \ + MDBX_cursor **tracking_head = &(mn).mc_txn->tw.cursors[mn.mc_dbi]; \ MDBX_cursor *tracked = &(mn); \ if ((mn).mc_flags & C_SUB) { \ mc_dummy.mc_flags = C_INITIALIZED; \ + mc_dummy.mc_top = 0; \ mc_dummy.mc_xcursor = (MDBX_xcursor *)&(mn); \ tracked = &mc_dummy; \ } \ @@ -4191,7 +4201,7 @@ static int mdbx_pages_xkeep(MDBX_cursor *mc, unsigned pflags, bool all) { } } mc = mc->mc_next; - for (; !mc || mc == m0; mc = txn->mt_cursors[--i]) + for (; !mc || mc == m0; mc = txn->tw.cursors[--i]) if (i == 0) goto mark_done; } @@ -5063,7 +5073,7 @@ __hot static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num, if (unlikely(mc->mc_flags & C_RECLAIMING)) { /* If mc is updating the GC, then the retired-list cannot play * catch-up with itself by growing while trying to save it. */ - flags &= ~(MDBX_ALLOC_GC | MDBX_COALESCE | MDBX_LIFORECLAIM); + flags &= ~MDBX_ALLOC_GC; } else if (unlikely(txn->mt_dbs[FREE_DBI].md_entries == 0)) { /* avoid (recursive) search inside empty tree and while tree is updating, * https://github.com/erthink/libmdbx/issues/31 */ @@ -5270,8 +5280,8 @@ skip_cache: /* Stop reclaiming to avoid overflow the page list. * This is a rare case while search for a continuously multi-page region * in a large database. https://github.com/erthink/libmdbx/issues/123 */ - flags -= MDBX_ALLOC_GC; - if (unlikely(flags == 0)) { + flags &= ~MDBX_ALLOC_GC; + if (unlikely((flags & MDBX_ALLOC_ALL) == 0)) { /* Oh, we can't do anything */ rc = MDBX_TXN_FULL; goto fail; @@ -5692,7 +5702,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) { done: /* Adjust cursors pointing to mp */ mc->mc_pg[mc->mc_top] = np; - m2 = txn->mt_cursors[mc->mc_dbi]; + m2 = txn->tw.cursors[mc->mc_dbi]; if (mc->mc_flags & C_SUB) { for (; m2; m2 = m2->mc_next) { m3 = &m2->mc_xcursor->mx_cursor; @@ -5851,34 +5861,35 @@ __cold int mdbx_env_sync_poll(MDBX_env *env) { } /* Back up parent txn's cursors, then grab the originals for tracking */ -static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { - MDBX_cursor *mc, *bk; - MDBX_xcursor *mx; - - for (int i = src->mt_numdbs; --i >= 0;) { - dst->mt_cursors[i] = NULL; - if ((mc = src->mt_cursors[i]) != NULL) { - size_t size = sizeof(MDBX_cursor); - if (mc->mc_xcursor) - size += sizeof(MDBX_xcursor); - for (; mc; mc = bk->mc_next) { +static int mdbx_cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { + for (int i = parent->mt_numdbs; --i >= 0;) { + nested->tw.cursors[i] = NULL; + MDBX_cursor *mc = parent->tw.cursors[i]; + if (mc != NULL) { + size_t size = mc->mc_xcursor ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) + : sizeof(MDBX_cursor); + for (MDBX_cursor *bk; mc; mc = bk->mc_next) { + bk = mc; + if (mc->mc_signature != MDBX_MC_LIVE) + continue; bk = mdbx_malloc(size); if (unlikely(!bk)) return MDBX_ENOMEM; *bk = *mc; mc->mc_backup = bk; - mc->mc_db = &dst->mt_dbs[i]; /* Kill pointers into src to reduce abuse: The * user may not use mc until dst ends. But we need a valid * txn pointer here for cursor fixups to keep working. */ - mc->mc_txn = dst; - mc->mc_dbistate = &dst->mt_dbistate[i]; - if ((mx = mc->mc_xcursor) != NULL) { + mc->mc_txn = nested; + mc->mc_db = &nested->mt_dbs[i]; + mc->mc_dbistate = &nested->mt_dbistate[i]; + MDBX_xcursor *mx = mc->mc_xcursor; + if (mx != NULL) { *(MDBX_xcursor *)(bk + 1) = *mx; - mx->mx_cursor.mc_txn = dst; + mx->mx_cursor.mc_txn = nested; } - mc->mc_next = dst->mt_cursors[i]; - dst->mt_cursors[i] = mc; + mc->mc_next = nested->tw.cursors[i]; + nested->tw.cursors[i] = mc; } } } @@ -5891,47 +5902,57 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { * [in] merge true to keep changes to parent cursors, false to revert. * * Returns 0 on success, non-zero on failure. */ -static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) { - MDBX_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; - MDBX_xcursor *mx; - int i; - - for (i = txn->mt_numdbs; --i >= 0;) { - for (mc = cursors[i]; mc; mc = next) { - unsigned stage = mc->mc_signature; - mdbx_ensure(txn->mt_env, - stage == MDBX_MC_LIVE || stage == MDBX_MC_WAIT4EOT); +static void mdbx_cursors_eot(MDBX_txn *txn, const bool merge) { + mdbx_tassert(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); + for (int i = txn->mt_numdbs; --i >= 0;) { + MDBX_cursor *next, *mc = txn->tw.cursors[i]; + if (!mc) + continue; + txn->tw.cursors[i] = NULL; + do { + const unsigned stage = mc->mc_signature; + MDBX_cursor *bk = mc->mc_backup; next = mc->mc_next; - mdbx_tassert(txn, !next || next->mc_signature == MDBX_MC_LIVE || - next->mc_signature == MDBX_MC_WAIT4EOT); - if ((bk = mc->mc_backup) != NULL) { - if (merge) { - /* Commit changes to parent txn */ + mdbx_ensure(txn->mt_env, + stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); + mdbx_cassert(mc, mc->mc_dbi == (unsigned)i); + if (bk) { + MDBX_xcursor *mx = mc->mc_xcursor; + mdbx_cassert(mc, mx == bk->mc_xcursor); + mdbx_tassert(txn, txn->mt_parent != NULL); + mdbx_ensure(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); + if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) + mc->mc_signature = stage /* Promote closed state to parent txn */; + else if (merge) { + /* Preserve changes from nested to parent txn */ mc->mc_next = bk->mc_next; mc->mc_backup = bk->mc_backup; mc->mc_txn = bk->mc_txn; + *bk->mc_db = *mc->mc_db; mc->mc_db = bk->mc_db; + *bk->mc_dbistate = *mc->mc_dbistate; mc->mc_dbistate = bk->mc_dbistate; - if ((mx = mc->mc_xcursor) != NULL) + if (mx) { + if (mx != bk->mc_xcursor) { + *bk->mc_xcursor = *mx; + mx = bk->mc_xcursor; + } mx->mx_cursor.mc_txn = bk->mc_txn; + } } else { - /* Abort nested txn */ + /* Restore from backup, i.e. rollback/abort nested txn */ *mc = *bk; - if ((mx = mc->mc_xcursor) != NULL) + if (mx) *mx = *(MDBX_xcursor *)(bk + 1); } bk->mc_signature = 0; mdbx_free(bk); - } - if (stage == MDBX_MC_WAIT4EOT) { - mc->mc_signature = 0; - mdbx_free(mc); } else { - mc->mc_signature = MDBX_MC_READY4CLOSE; + mdbx_ensure(txn->mt_env, stage == MDBX_MC_LIVE); + mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; mc->mc_flags = 0 /* reset C_UNTRACK */; } - } - cursors[i] = NULL; + } while ((mc = next) != NULL); } } @@ -6576,7 +6597,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (parent) { mdbx_tassert(txn, mdbx_dirtylist_check(parent)); - txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->tw.cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_dbiseqs = parent->mt_dbiseqs; txn->tw.dirtylist = mdbx_malloc(sizeof(MDBX_DP) * (MDBX_DPL_TXNFULL + 1)); txn->tw.reclaimed_pglist = @@ -6798,6 +6819,7 @@ int mdbx_txn_flags(const MDBX_txn *txn) { /* Export or close DBI handles opened in this txn. */ static void mdbx_dbis_update(MDBX_txn *txn, int keep) { + mdbx_tassert(txn, !txn->mt_parent && txn == txn->mt_env->me_txn0); MDBX_dbi n = txn->mt_numdbs; if (n) { bool locked = false; @@ -6899,10 +6921,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { if (txn == env->me_txn0) mdbx_txn_valgrind(env, nullptr); #endif - /* Export or close DBI handles created in this txn */ - mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ - mdbx_cursors_eot(txn, 0); + mdbx_cursors_eot(txn, false); if (!(env->me_flags & MDBX_WRITEMAP)) mdbx_dlist_free(txn); @@ -6911,6 +6931,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { env->me_txn = txn->mt_parent; if (txn == env->me_txn0) { mdbx_assert(env, txn->mt_parent == NULL); + /* Export or close DBI handles created in this txn */ + mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); mdbx_pnl_shrink(&txn->tw.retired_pages); mdbx_pnl_shrink(&txn->tw.reclaimed_pglist); /* The writer mutex was locked in mdbx_txn_begin. */ @@ -7225,8 +7247,8 @@ static int mdbx_update_gc(MDBX_txn *txn) { goto bailout_notracking; couple.outer.mc_flags |= C_RECLAIMING; - couple.outer.mc_next = txn->mt_cursors[FREE_DBI]; - txn->mt_cursors[FREE_DBI] = &couple.outer; + couple.outer.mc_next = txn->tw.cursors[FREE_DBI]; + txn->tw.cursors[FREE_DBI] = &couple.outer; retry: ++loop; @@ -7878,7 +7900,7 @@ retry_noaccount: cleaned_gc_slot == MDBX_PNL_SIZE(txn->tw.lifo_reclaimed)); bailout: - txn->mt_cursors[FREE_DBI] = couple.outer.mc_next; + txn->tw.cursors[FREE_DBI] = couple.outer.mc_next; bailout_notracking: MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) = 0; @@ -8143,7 +8165,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { parent->mt_flags |= txn->mt_flags & MDBX_TXN_DIRTY; /* Merge our cursors into parent's and close them */ - mdbx_cursors_eot(txn, 1); + mdbx_cursors_eot(txn, true); + end_mode |= MDBX_END_EOTDONE; /* Update parent's DB table. */ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); @@ -8369,7 +8392,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == MDBX_DPL_TXNFULL); - mdbx_cursors_eot(txn, 0); + mdbx_cursors_eot(txn, false); end_mode |= MDBX_END_EOTDONE; if (txn->tw.dirtylist->length == 0 && @@ -10863,8 +10886,8 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname, MDBX_txn *txn = mdbx_calloc(1, size); if (txn) { txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); - txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); - txn->mt_dbiseqs = (unsigned *)(txn->mt_cursors + env->me_maxdbs); + txn->tw.cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = (unsigned *)(txn->tw.cursors + env->me_maxdbs); txn->mt_dbistate = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_env = env; txn->mt_dbxs = env->me_dbxs; @@ -11255,7 +11278,7 @@ static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, const MDBX_val *key, static void mdbx_cursor_adjust(MDBX_cursor *mc, func) { MDBX_cursor *m2; - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) { func(mc, m2); } @@ -11264,13 +11287,11 @@ static void mdbx_cursor_adjust(MDBX_cursor *mc, func) { #endif /* Pop a page off the top of the cursor's stack. */ -static void mdbx_cursor_pop(MDBX_cursor *mc) { +static __inline void mdbx_cursor_pop(MDBX_cursor *mc) { if (mc->mc_snum) { mdbx_debug("popped page %" PRIaPGNO " off db %d cursor %p", mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *)mc); - - mc->mc_snum--; - if (mc->mc_snum) { + if (--mc->mc_snum) { mc->mc_top--; } else { mc->mc_flags &= ~C_INITIALIZED; @@ -11280,7 +11301,7 @@ static void mdbx_cursor_pop(MDBX_cursor *mc) { /* Push a page onto the top of the cursor's stack. * Set MDBX_TXN_ERROR on failure. */ -static int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) { +static __inline int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) { mdbx_debug("pushing page %" PRIaPGNO " on db %d cursor %p", mp->mp_pgno, DDBI(mc), (void *)mc); @@ -11712,20 +11733,7 @@ int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, if (unlikely(rc != MDBX_SUCCESS)) return rc; - MDBX_val save_data = *data; - int exact = 0; - rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_SET_RANGE, &exact); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (exact && (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) != 0) { - *data = save_data; - exact = 0; - rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_GET_BOTH_RANGE, &exact); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - return exact ? MDBX_SUCCESS : MDBX_RESULT_TRUE; + return mdbx_cursor_get(&cx.outer, key, data, MDBX_SET_LOWERBOUND); } int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, @@ -11799,7 +11807,7 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int dir) { if ((dir == SIBLING_RIGHT) ? (mc->mc_ki[mc->mc_top] + 1u >= page_numkeys(mc->mc_pg[mc->mc_top])) : (mc->mc_ki[mc->mc_top] == 0)) { - mdbx_debug("no more keys left, moving to %s sibling", + mdbx_debug("no more keys aside, moving to next %s sibling", dir ? "right" : "left"); if (unlikely((rc = mdbx_cursor_sibling(mc, dir)) != MDBX_SUCCESS)) { /* undo cursor_pop before returning */ @@ -11827,9 +11835,9 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int dir) { rc = mdbx_cursor_push(mc, mp); if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (dir == SIBLING_LEFT) - mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1; + mc->mc_ki[mc->mc_top] = + (indx_t)((dir == SIBLING_LEFT) ? page_numkeys(mp) - 1 : 0); return MDBX_SUCCESS; } @@ -11879,8 +11887,12 @@ static int mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, goto skip; } - if (mc->mc_ki[mc->mc_top] + 1u >= page_numkeys(mp)) { + int ki = mc->mc_ki[mc->mc_top]; + mc->mc_ki[mc->mc_top] = (indx_t)++ki; + const int numkeys = page_numkeys(mp); + if (unlikely(ki >= numkeys)) { mdbx_debug("%s", "=====> move to next sibling page"); + mc->mc_ki[mc->mc_top] = numkeys - 1; if (unlikely((rc = mdbx_cursor_sibling(mc, SIBLING_RIGHT)) != MDBX_SUCCESS)) { mc->mc_flags |= C_EOF; @@ -11889,8 +11901,7 @@ static int mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mp = mc->mc_pg[mc->mc_top]; mdbx_debug("next page is %" PRIaPGNO ", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); - } else - mc->mc_ki[mc->mc_top]++; + } skip: mdbx_debug("==> cursor points to page %" PRIaPGNO @@ -11972,18 +11983,17 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mc->mc_flags &= ~(C_EOF | C_DEL); - if (mc->mc_ki[mc->mc_top] == 0) { + int ki = mc->mc_ki[mc->mc_top]; + mc->mc_ki[mc->mc_top] = (indx_t)--ki; + if (unlikely(ki < 0)) { + mc->mc_ki[mc->mc_top] = 0; mdbx_debug("%s", "=====> move to prev sibling page"); - if ((rc = mdbx_cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS) { + if ((rc = mdbx_cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS) return rc; - } mp = mc->mc_pg[mc->mc_top]; - mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1; mdbx_debug("prev page is %" PRIaPGNO ", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); - } else - mc->mc_ki[mc->mc_top]--; - + } mdbx_debug("==> cursor points to page %" PRIaPGNO " with %u keys, key index %u", mp->mp_pgno, page_numkeys(mp), mc->mc_ki[mc->mc_top]); @@ -12067,8 +12077,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); mp = mc->mc_pg[mc->mc_top]; - if (!page_numkeys(mp)) { + if (unlikely(!page_numkeys(mp))) { mc->mc_ki[mc->mc_top] = 0; + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } if (IS_LEAF2(mp)) { @@ -12084,6 +12095,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, * was the one we wanted. */ mc->mc_ki[mc->mc_top] = 0; *exactp = 1; + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); goto set1; } if (rc > 0) { @@ -12102,6 +12116,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mdbx_cassert(mc, nkeys >= 1 && nkeys <= UINT16_MAX + 1); mc->mc_ki[mc->mc_top] = (indx_t)(nkeys - 1); *exactp = 1; + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); goto set1; } if (rc < 0) { @@ -12118,6 +12135,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (rc == 0) { /* current node was the one we wanted */ *exactp = 1; + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); goto set1; } } @@ -12135,6 +12155,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, /* There are no other pages */ mdbx_cassert(mc, nkeys <= UINT16_MAX); mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } } @@ -12144,8 +12165,12 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (op == MDBX_SET_RANGE) { rc = 0; goto set1; - } else + } else { + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); return MDBX_NOTFOUND; + } } } else { mc->mc_pg[0] = 0; @@ -12162,6 +12187,8 @@ set2: node = mdbx_node_search(mc, &aligned_key, exactp); if (!*exactp && op != MDBX_SET_RANGE) { /* MDBX_SET specified and not an exact match. */ + if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top]))) + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } @@ -12174,8 +12201,12 @@ set2: } mp = mc->mc_pg[mc->mc_top]; mdbx_cassert(mc, IS_LEAF(mp)); - node = page_node(mp, 0); + if (!IS_LEAF2(mp)) + node = page_node(mp, 0); } + mdbx_cassert(mc, + mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); set1: mc->mc_flags |= C_INITIALIZED; @@ -12237,21 +12268,20 @@ set1: return rc; rc = mc->mc_dbx->md_dcmp(&aligned_data, &olddata); if (rc) { + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); if (op != MDBX_GET_BOTH_RANGE || rc > 0) return MDBX_NOTFOUND; *exactp = 0; rc = 0; } *data = olddata; - } else { - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - if (unlikely((rc = mdbx_node_read( - mc, node, data, - pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) != - MDBX_SUCCESS)) - return rc; - } + } else if (unlikely((rc = mdbx_node_read(mc, node, data, + pp_txnid4chk(mc->mc_pg[mc->mc_top], + mc->mc_txn))) != + MDBX_SUCCESS)) + return rc; } /* The key already matches in all other cases */ @@ -12364,7 +12394,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -12375,12 +12406,13 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, switch (op) { case MDBX_GET_CURRENT: { if (unlikely(!(mc->mc_flags & C_INITIALIZED))) - return MDBX_EINVAL; + return MDBX_ENODATA; MDBX_page *mp = mc->mc_pg[mc->mc_top]; const unsigned nkeys = page_numkeys(mp); if (mc->mc_ki[mc->mc_top] >= nkeys) { mdbx_cassert(mc, nkeys <= UINT16_MAX); mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } mdbx_cassert(mc, nkeys > 0); @@ -12430,6 +12462,12 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (unlikely(key == NULL)) return MDBX_EINVAL; rc = mdbx_cursor_set(mc, key, data, op, &exact); + if (mc->mc_flags & C_INITIALIZED) { + mdbx_cassert(mc, mc->mc_snum > 0 && mc->mc_top < mc->mc_snum); + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); + } break; case MDBX_GET_MULTIPLE: if (unlikely(data == NULL || !(mc->mc_flags & C_INITIALIZED))) @@ -12502,6 +12540,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_INCOMPATIBLE; if (mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])) { mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mc->mc_pg[mc->mc_top]); + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } { @@ -12523,6 +12562,33 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, case MDBX_LAST_DUP: mfunc = mdbx_cursor_last; goto mmove; + case MDBX_SET_LOWERBOUND: { + if (unlikely(key == NULL || data == NULL)) + return MDBX_EINVAL; + MDBX_val save_data = *data; + rc = mdbx_cursor_set(mc, key, data, MDBX_SET_RANGE, &exact); + if (rc == MDBX_SUCCESS && exact && mc->mc_xcursor) { + mc->mc_flags &= ~C_DEL; + if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + *data = save_data; + exact = 0; + rc = mdbx_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, + MDBX_SET_RANGE, &exact); + if (rc == MDBX_NOTFOUND) { + mdbx_cassert(mc, !exact); + rc = mdbx_cursor_next(mc, key, data, MDBX_NEXT_NODUP); + } + } else { + int cmp = mc->mc_dbx->md_dcmp(&save_data, data); + exact = (cmp == 0); + if (cmp > 0) + rc = mdbx_cursor_next(mc, key, data, MDBX_NEXT_NODUP); + } + } + if (rc == MDBX_SUCCESS && !exact) + rc = MDBX_RESULT_TRUE; + break; + } default: mdbx_debug("unhandled/unimplemented cursor operation %u", op); return MDBX_EINVAL; @@ -12577,12 +12643,14 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; + mdbx_cassert(mc, cursor_is_tracked(mc)); env = mc->mc_txn->mt_env; /* Check this first so counter will always be zero on any early failures. */ @@ -13241,18 +13309,17 @@ new_sub:; rc = mdbx_node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, rdata, nflags); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2, *m3; - MDBX_dbi dbi = mc->mc_dbi; - unsigned i = mc->mc_top; - MDBX_page *mp = mc->mc_pg[i]; - - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + const MDBX_dbi dbi = mc->mc_dbi; + const unsigned i = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[i]; + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; + m2 = m2->mc_next) { + MDBX_cursor *m3 = + (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[i] != mp) continue; - if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { - m3->mc_ki[i]++; - } + if (m3->mc_ki[i] >= mc->mc_ki[i]) + m3->mc_ki[i] += insert_key; if (XCURSOR_INITED(m3)) XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); } @@ -13302,7 +13369,7 @@ new_sub:; MDBX_page *mp = mc->mc_pg[i]; const int nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) @@ -13375,7 +13442,8 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -13427,7 +13495,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { node = page_node(mp, mc->mc_ki[mc->mc_top]); mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); /* fix other sub-DB cursors pointed at fake pages on this page */ - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) @@ -14039,7 +14107,7 @@ int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) { if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EINVAL; + return MDBX_EBADSIGN; MDBX_cursor_couple *couple = container_of(mc, MDBX_cursor_couple, outer); couple->mc_userctx = ctx; @@ -14062,26 +14130,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return MDBX_EINVAL; - if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE)) { - if (unlikely(mc->mc_signature != MDBX_MC_LIVE || mc->mc_backup)) - return MDBX_EINVAL; - if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) - return MDBX_PROBLEM; - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - if (*prev == mc) - *prev = mc->mc_next; - } - mc->mc_signature = MDBX_MC_READY4CLOSE; - mc->mc_flags = 0; - mc->mc_dbi = UINT_MAX; - } - - assert(!mc->mc_backup && !mc->mc_flags); - if (unlikely(mc->mc_backup || mc->mc_flags)) - return MDBX_PROBLEM; + if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && + mc->mc_signature != MDBX_MC_LIVE)) + return MDBX_EBADSIGN; int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -14093,13 +14144,52 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY))) return MDBX_EACCESS; + if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ { + mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE); + if (unlikely(mc->mc_dbi != dbi || + /* paranoia */ mc->mc_signature != MDBX_MC_LIVE || + mc->mc_txn != txn)) + return MDBX_EINVAL; + + assert(mc->mc_db == &txn->mt_dbs[dbi]); + assert(mc->mc_dbx == &txn->mt_dbxs[dbi]); + assert(mc->mc_dbi == dbi); + assert(mc->mc_dbistate == &txn->mt_dbistate[dbi]); + return likely(mc->mc_dbi == dbi && + /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && + mc->mc_txn == txn) + ? MDBX_SUCCESS + : MDBX_EINVAL /* Disallow change DBI in nested transactions */; + } + + if (mc->mc_signature == MDBX_MC_LIVE) { + if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDBX_PROBLEM; + if (mc->mc_flags & C_UNTRACK) { + mdbx_cassert(mc, !(mc->mc_txn->mt_flags & MDBX_TXN_RDONLY)); + MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->mc_next; + mdbx_cassert(mc, *prev == mc); + *prev = mc->mc_next; + } + mc->mc_signature = MDBX_MC_READY4CLOSE; + mc->mc_flags = 0; + mc->mc_dbi = UINT_MAX; + mc->mc_next = NULL; + mc->mc_db = NULL; + mc->mc_dbx = NULL; + mc->mc_dbistate = NULL; + } + mdbx_cassert(mc, !(mc->mc_flags & C_UNTRACK)); + rc = mdbx_cursor_init(mc, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (txn->mt_cursors) { - mc->mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = mc; + if (!(txn->mt_flags & MDBX_TXN_RDONLY)) { + mc->mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = mc; mc->mc_flags |= C_UNTRACK; } @@ -14129,13 +14219,94 @@ int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; } +int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { + if (unlikely(!src)) + return MDBX_EINVAL; + if (unlikely(src->mc_signature != MDBX_MC_LIVE)) + return (src->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = mdbx_cursor_bind(src->mc_txn, dest, src->mc_dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + assert(dest->mc_db == src->mc_db); + assert(dest->mc_dbi == src->mc_dbi); + assert(dest->mc_dbx == src->mc_dbx); + assert(dest->mc_dbistate == src->mc_dbistate); +again: + assert(dest->mc_txn == src->mc_txn); + dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK; + dest->mc_top = src->mc_top; + dest->mc_snum = src->mc_snum; + for (unsigned i = 0; i < src->mc_snum; ++i) { + dest->mc_ki[i] = src->mc_ki[i]; + dest->mc_pg[i] = src->mc_pg[i]; + } + + if (src->mc_xcursor) { + dest->mc_xcursor->mx_db = src->mc_xcursor->mx_db; + dest->mc_xcursor->mx_dbx = src->mc_xcursor->mx_dbx; + src = &src->mc_xcursor->mx_cursor; + dest = &dest->mc_xcursor->mx_cursor; + goto again; + } + + return MDBX_SUCCESS; +} + +void mdbx_cursor_close(MDBX_cursor *mc) { + if (mc) { + mdbx_ensure(NULL, mc->mc_signature == MDBX_MC_LIVE || + mc->mc_signature == MDBX_MC_READY4CLOSE); + if (!mc->mc_backup) { + /* Remove from txn, if tracked. + * A read-only txn (!C_UNTRACK) may have been freed already, + * so do not peek inside it. Only write txns track cursors. */ + if (mc->mc_flags & C_UNTRACK) { + mdbx_cassert(mc, !(mc->mc_txn->mt_flags & MDBX_TXN_RDONLY)); + MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->mc_next; + mdbx_cassert(mc, *prev == mc); + *prev = mc->mc_next; + } + mc->mc_signature = 0; + mc->mc_next = mc; + mdbx_free(mc); + } else { + /* Cursor closed before nested txn ends */ + mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE); + mc->mc_signature = MDBX_MC_WAIT4EOT; + } + } +} + +MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { + if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) + return NULL; + MDBX_txn *txn = mc->mc_txn; + if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) + return NULL; + if (unlikely(txn->mt_flags & MDBX_TXN_FINISHED)) + return NULL; + return txn; +} + +MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { + if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) + return UINT_MAX; + return mc->mc_dbi; +} + /* Return the count of duplicate data items for the current key */ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { if (unlikely(mc == NULL)) return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -14169,48 +14340,6 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { return MDBX_SUCCESS; } -void mdbx_cursor_close(MDBX_cursor *mc) { - if (mc) { - mdbx_ensure(NULL, mc->mc_signature == MDBX_MC_LIVE || - mc->mc_signature == MDBX_MC_READY4CLOSE); - if (!mc->mc_backup) { - /* Remove from txn, if tracked. - * A read-only txn (!C_UNTRACK) may have been freed already, - * so do not peek inside it. Only write txns track cursors. */ - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - if (*prev == mc) - *prev = mc->mc_next; - } - mc->mc_signature = 0; - mdbx_free(mc); - } else { - /* cursor closed before nested txn ends */ - mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE); - mc->mc_signature = MDBX_MC_WAIT4EOT; - } - } -} - -MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { - if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) - return NULL; - MDBX_txn *txn = mc->mc_txn; - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) - return NULL; - if (unlikely(txn->mt_flags & MDBX_TXN_FINISHED)) - return NULL; - return txn; -} - -MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { - if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) - return UINT_MAX; - return mc->mc_dbi; -} - /* Replace the key for a branch node with a new key. * Set MDBX_TXN_ERROR on failure. * [in] mc Cursor pointing to the node to operate on. @@ -14225,6 +14354,7 @@ static int mdbx_update_key(MDBX_cursor *mc, const MDBX_val *key) { int ptr, i, nkeys, indx; DKBUF; + mdbx_cassert(mc, cursor_is_tracked(mc)); indx = mc->mc_ki[mc->mc_top]; mp = mc->mc_pg[mc->mc_top]; node = page_node(mp, indx); @@ -14343,7 +14473,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { const unsigned snum = cdst->mc_snum; mdbx_cassert(csrc, snum > 0); MDBX_cursor mn; - mdbx_cursor_copy(cdst, &mn); + cursor_copy_internal(cdst, &mn); mn.mc_xcursor = NULL; /* must find the lowest key below dst */ rc = mdbx_page_search_lowest(&mn); @@ -14382,7 +14512,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { psrc = csrc->mc_pg[csrc->mc_top]; pdst = cdst->mc_pg[cdst->mc_top]; - rc = mdbx_update_key(&mn, &key); + WITH_CURSOR_TRACKING(mn, rc = mdbx_update_key(&mn, &key)); if (unlikely(rc)) return rc; } else { @@ -14466,7 +14596,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { mdbx_cassert(csrc, csrc->mc_top == cdst->mc_top); if (fromleft) { /* If we're adding on the left, bump others up */ - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top) continue; @@ -14486,7 +14616,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { } } else { /* Adding on the right, bump others down */ - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == csrc) continue; @@ -14525,7 +14655,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { mdbx_debug("update separator for source page %" PRIaPGNO " to [%s]", psrc->mp_pgno, DKEY(&key)); MDBX_cursor mn; - mdbx_cursor_copy(csrc, &mn); + cursor_copy_internal(csrc, &mn); mn.mc_xcursor = NULL; mdbx_cassert(csrc, mn.mc_snum > 0); mn.mc_snum--; @@ -14560,7 +14690,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { mdbx_debug("update separator for destination page %" PRIaPGNO " to [%s]", pdst->mp_pgno, DKEY(&key)); MDBX_cursor mn; - mdbx_cursor_copy(cdst, &mn); + cursor_copy_internal(cdst, &mn); mn.mc_xcursor = NULL; mdbx_cassert(cdst, mn.mc_snum > 0); mn.mc_snum--; @@ -14597,6 +14727,8 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { int rc; mdbx_cassert(csrc, csrc != cdst); + mdbx_cassert(csrc, cursor_is_tracked(csrc)); + mdbx_cassert(cdst, cursor_is_tracked(cdst)); const MDBX_page *const psrc = csrc->mc_pg[csrc->mc_top]; MDBX_page *pdst = cdst->mc_pg[cdst->mc_top]; mdbx_debug("merging page %" PRIaPGNO " into %" PRIaPGNO, psrc->mp_pgno, @@ -14641,7 +14773,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { key.iov_base = node_key(srcnode); if (pagetype & P_BRANCH) { MDBX_cursor mn; - mdbx_cursor_copy(csrc, &mn); + cursor_copy_internal(csrc, &mn); mn.mc_xcursor = NULL; /* must find the lowest key below src */ rc = mdbx_page_search_lowest(&mn); @@ -14726,7 +14858,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { const MDBX_dbi dbi = csrc->mc_dbi; const unsigned top = csrc->mc_top; - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == csrc || top >= m3->mc_snum) continue; @@ -14837,7 +14969,7 @@ bailout: /* Copy the contents of a cursor. * [in] csrc The cursor to copy from. * [out] cdst The cursor to copy to. */ -static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { +static void cursor_copy_internal(const MDBX_cursor *csrc, MDBX_cursor *cdst) { mdbx_cassert(csrc, csrc->mc_txn->mt_txnid >= *csrc->mc_txn->mt_env->me_oldest); cdst->mc_txn = csrc->mc_txn; @@ -14858,6 +14990,7 @@ static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { * [in] mc Cursor pointing to the page where rebalancing should begin. * Returns 0 on success, non-zero on failure. */ static int mdbx_rebalance(MDBX_cursor *mc) { + mdbx_cassert(mc, cursor_is_tracked(mc)); mdbx_cassert(mc, mc->mc_snum > 0); mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth || IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); @@ -14915,8 +15048,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mc->mc_db->md_overflow_pages == 0 && mc->mc_db->md_leaf_pages == 1); /* Adjust cursors pointing to mp */ - const MDBX_dbi dbi = mc->mc_dbi; - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; @@ -14950,10 +15082,10 @@ static int mdbx_rebalance(MDBX_cursor *mc) { } /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2, *m3; - MDBX_dbi dbi = mc->mc_dbi; - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; + m2 = m2->mc_next) { + MDBX_cursor *m3 = + (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc || !(m3->mc_flags & C_INITIALIZED)) continue; if (m3->mc_pg[0] == mp) { @@ -14994,7 +15126,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { /* Find neighbors. */ MDBX_cursor mn; - mdbx_cursor_copy(mc, &mn); + cursor_copy_internal(mc, &mn); mn.mc_xcursor = NULL; MDBX_page *left = nullptr, *right = nullptr; @@ -15031,7 +15163,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { /* We want mdbx_rebalance to find mn when doing fixups */ WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn)); if (likely(rc != MDBX_RESULT_TRUE)) { - mdbx_cursor_copy(&mn, mc); + cursor_copy_internal(&mn, mc); mc->mc_ki[mc->mc_top] = new_ki; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); return rc; @@ -15044,7 +15176,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1; mn.mc_ki[mn.mc_top] = 0; mc->mc_ki[mc->mc_top] = nkeys; - rc = mdbx_page_merge(&mn, mc); + WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15059,7 +15191,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1; mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1); mc->mc_ki[mc->mc_top] = 0; - rc = mdbx_node_move(&mn, mc, true); + WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, true)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top + 1; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15072,7 +15204,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1; mn.mc_ki[mn.mc_top] = 0; mc->mc_ki[mc->mc_top] = nkeys; - rc = mdbx_node_move(&mn, mc, false); + WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, false)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15100,19 +15232,20 @@ static int mdbx_rebalance(MDBX_cursor *mc) { /* We want mdbx_rebalance to find mn when doing fixups */ WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn)); if (likely(rc != MDBX_RESULT_TRUE)) { - mdbx_cursor_copy(&mn, mc); + cursor_copy_internal(&mn, mc); mc->mc_ki[mc->mc_top] = new_ki; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); return rc; } - } else if (likely(right)) { + } + if (likely(right)) { /* try merge with right */ mdbx_cassert(mc, page_numkeys(right) >= minkeys); mn.mc_pg[mn.mc_top] = right; mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1; mn.mc_ki[mn.mc_top] = 0; mc->mc_ki[mc->mc_top] = nkeys; - rc = mdbx_page_merge(&mn, mc); + WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15472,6 +15605,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { unsigned nkeys; MDBX_dbi dbi = mc->mc_dbi; + mdbx_cassert(mc, cursor_is_tracked(mc)); mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); ki = mc->mc_ki[mc->mc_top]; mp = mc->mc_pg[mc->mc_top]; @@ -15479,7 +15613,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { mc->mc_db->md_entries--; /* Adjust other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc || !(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) continue; @@ -15524,7 +15658,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { nkeys == 0)); /* Adjust this and other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) continue; @@ -15632,10 +15766,10 @@ static int mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * is larger than the current one, the parent page may * run out of space, triggering a split. We need this * cursor to be consistent until the end of the rebalance. */ - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; rc = mdbx_cursor_del(&cx.outer, flags); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; } return rc; } @@ -15722,7 +15856,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, mdbx_debug("parent branch page is %" PRIaPGNO, mc->mc_pg[ptop]->mp_pgno); } - mdbx_cursor_copy(mc, &mn); + cursor_copy_internal(mc, &mn); mn.mc_xcursor = NULL; mn.mc_pg[mn.mc_top] = rp; mn.mc_ki[mn.mc_top] = 0; @@ -16071,7 +16205,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, MDBX_dbi dbi = mc->mc_dbi; nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc) continue; @@ -16147,8 +16281,8 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, rc = mdbx_cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; /* LY: support for update (explicit overwrite) */ if (flags & MDBX_CURRENT) { @@ -16169,7 +16303,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, if (likely(rc == MDBX_SUCCESS)) rc = mdbx_cursor_put(&cx.outer, key, data, flags); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; return rc; } @@ -17454,6 +17588,8 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags, txn->mt_dbistate[slot] = (uint8_t)dbiflags; txn->mt_dbxs[slot].md_name.iov_base = namedup; txn->mt_dbxs[slot].md_name.iov_len = len; + if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) + txn->tw.cursors[slot] = NULL; txn->mt_numdbs += (slot == txn->mt_numdbs); if ((dbiflags & DBI_CREAT) == 0) { env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; @@ -17591,7 +17727,7 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) { if (unlikely(rc)) goto done; - mdbx_cursor_copy(mc, &mx); + cursor_copy_internal(mc, &mx); while (mc->mc_snum > 0) { MDBX_page *mp = mc->mc_pg[mc->mc_top]; unsigned n = page_numkeys(mp); @@ -17688,7 +17824,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { rc = mdbx_drop0(mc, mc->mc_db->md_flags & MDBX_DUPSORT); /* Invalidate the dropped DB's cursors */ - for (MDBX_cursor *m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) + for (MDBX_cursor *m2 = txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) m2->mc_flags &= ~(C_INITIALIZED | C_EOF); if (unlikely(rc)) goto bailout; @@ -18519,10 +18655,11 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) - return MDBX_RESULT_FALSE; + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; for (unsigned i = 0; i < mc->mc_snum; ++i) { if (mc->mc_ki[i]) @@ -18537,10 +18674,11 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) - return MDBX_RESULT_FALSE; + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; for (unsigned i = 0; i < mc->mc_snum; ++i) { unsigned nkeys = page_numkeys(mc->mc_pg[i]); @@ -18556,7 +18694,8 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; if ((mc->mc_flags & C_INITIALIZED) == 0) return MDBX_RESULT_TRUE; @@ -18587,9 +18726,13 @@ __hot static int cursor_diff(const MDBX_cursor *const __restrict x, r->level = 0; r->root_nkeys = 0; - if (unlikely(y->mc_signature != MDBX_MC_LIVE || - x->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + if (unlikely(x->mc_signature != MDBX_MC_LIVE)) + return (x->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (unlikely(y->mc_signature != MDBX_MC_LIVE)) + return (y->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(x->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -18755,7 +18898,8 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(cursor->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (cursor->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(cursor->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -18765,7 +18909,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return MDBX_ENODATA; MDBX_cursor_couple next; - mdbx_cursor_copy(cursor, &next.outer); + cursor_copy_internal(cursor, &next.outer); next.outer.mc_xcursor = NULL; if (cursor->mc_db->md_flags & MDBX_DUPSORT) { next.outer.mc_xcursor = &next.inner; @@ -18773,7 +18917,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, if (unlikely(rc != MDBX_SUCCESS)) return rc; MDBX_xcursor *mx = &container_of(cursor, MDBX_cursor_couple, outer)->inner; - mdbx_cursor_copy(&mx->mx_cursor, &next.inner.mx_cursor); + cursor_copy_internal(&mx->mx_cursor, &next.inner.mx_cursor); } MDBX_val stub = {0, 0}; @@ -19010,8 +19154,8 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, rc = mdbx_cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; MDBX_val present_key = *key; if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) { @@ -19087,7 +19231,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, rc = mdbx_cursor_del(&cx.outer, flags & MDBX_ALLDUPS); bailout: - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; return rc; } @@ -19611,10 +19755,10 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, rc = mdbx_cursor_set(&cx.outer, key, &old_data, MDBX_SET, NULL); if (unlikely(rc != MDBX_SUCCESS)) { if (rc == MDBX_NOTFOUND && data) { - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; rc = mdbx_cursor_put_attr(&cx.outer, key, data, attr, 0); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; } return rc; } @@ -19629,11 +19773,11 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, old_data.iov_len) == 0))) return MDBX_SUCCESS; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; rc = mdbx_cursor_put_attr(&cx.outer, key, data ? data : &old_data, attr, MDBX_CURRENT); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; return rc; } #endif /* MDBX_NEXENTA_ATTRS */ diff --git a/src/internals.h b/src/internals.h index 72b62f60..85c00493 100644 --- a/src/internals.h +++ b/src/internals.h @@ -778,8 +778,6 @@ struct MDBX_txn { MDBX_db *mt_dbs; /* Array of sequence numbers for each DB handle */ unsigned *mt_dbiseqs; - /* In write txns, array of cursors for each DB */ - MDBX_cursor **mt_cursors; /* Transaction DBI Flags */ #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ @@ -806,6 +804,8 @@ struct MDBX_txn { MDBX_reader *reader; } to; struct { + /* In write txns, array of cursors for each DB */ + MDBX_cursor **cursors; pgno_t *reclaimed_pglist; /* Reclaimed GC pages */ txnid_t last_reclaimed; /* ID of last used record */ pgno_t loose_refund_wl /* FIXME: describe */; diff --git a/test/keygen.cc b/test/keygen.cc index 411bf623..a941e3ae 100644 --- a/test/keygen.cc +++ b/test/keygen.cc @@ -115,7 +115,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, serial_t key_serial = serial; serial_t value_serial = value_age << mapping.split; if (mapping.split) { - if (key_essentials.flags & MDBX_DUPSORT) { + if (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) { key_serial >>= mapping.split; value_serial += serial & mask(mapping.split); } else { @@ -203,7 +203,7 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, #endif key_essentials.flags = actor.table_flags & - uint16_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT); + MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT); assert(actor.keylen_min <= UINT16_MAX); key_essentials.minlen = (uint16_t)actor.keylen_min; assert(actor.keylen_max <= UINT32_MAX); @@ -213,7 +213,7 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, actor.pagesize, MDBX_db_flags_t(key_essentials.flags)))); value_essentials.flags = - actor.table_flags & uint16_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP); + actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP); assert(actor.datalen_min <= UINT16_MAX); value_essentials.minlen = (uint16_t)actor.datalen_min; assert(actor.datalen_max <= UINT32_MAX); @@ -236,35 +236,41 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, } void maker::make_linear() { - mapping.mesh = (key_essentials.flags & MDBX_DUPSORT) ? 0 : mapping.split; + mapping.mesh = (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) + ? 0 + : mapping.split; mapping.rotate = 0; mapping.offset = 0; const auto max_serial = mask(mapping.width) + base; const auto max_key_serial = - (mapping.split && (key_essentials.flags & MDBX_DUPSORT)) + (mapping.split && (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT)) ? max_serial >> mapping.split : max_serial; const auto max_value_serial = - (mapping.split && (key_essentials.flags & MDBX_DUPSORT)) + (mapping.split && (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT)) ? mask(mapping.split) : 0; while (key_essentials.minlen < 8 && (key_essentials.minlen == 0 || mask(key_essentials.minlen * 8) < max_key_serial)) { - key_essentials.minlen += - (key_essentials.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 : 1; + key_essentials.minlen += (MDBX_db_flags_t(key_essentials.flags) & + (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) + ? 4 + : 1; if (key_essentials.maxlen < key_essentials.minlen) key_essentials.maxlen = key_essentials.minlen; } - if ((key_essentials.flags | value_essentials.flags) & MDBX_DUPSORT) + if (MDBX_db_flags_t(key_essentials.flags | value_essentials.flags) & + MDBX_DUPSORT) while (value_essentials.minlen < 8 && (value_essentials.minlen == 0 || mask(value_essentials.minlen * 8) < max_value_serial)) { - value_essentials.minlen += - (value_essentials.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 - : 1; + value_essentials.minlen += (MDBX_db_flags_t(value_essentials.flags) & + (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) + ? 4 + : 1; if (value_essentials.maxlen < value_essentials.minlen) value_essentials.maxlen = value_essentials.minlen; } @@ -272,8 +278,9 @@ void maker::make_linear() { bool maker::is_unordered() const { return mapping.rotate || - mapping.mesh > - ((key_essentials.flags & MDBX_DUPSORT) ? 0 : mapping.split); + mapping.mesh > ((MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) + ? 0 + : mapping.split); } bool maker::increment(serial_t &serial, int delta) const { @@ -374,9 +381,9 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, #endif assert(length(serial) <= out.value.iov_len); out.value.iov_base = out.bytes; - if (params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) { + if (MDBX_db_flags_t(params.flags) & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) { assert(params.maxlen == params.minlen); - if (params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) + if (MDBX_db_flags_t(params.flags) & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) assert(params.minlen == 4 || params.minlen == 8); out.u64 = serial; if (!is_byteorder_le() && out.value.iov_len != 8) @@ -393,7 +400,8 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, } else memset(out.bytes + 8, '\0', out.value.iov_len - prefix); } - if (unlikely(params.flags & (MDBX_REVERSEKEY | MDBX_REVERSEDUP))) + if (unlikely(MDBX_db_flags_t(params.flags) & + (MDBX_REVERSEKEY | MDBX_REVERSEDUP))) std::reverse((char *)out.value.iov_base, (char *)out.value.iov_base + out.value.iov_len); } diff --git a/test/nested.cc b/test/nested.cc index d26382bf..1952780d 100644 --- a/test/nested.cc +++ b/test/nested.cc @@ -80,8 +80,8 @@ bool testcase_nested::teardown() { void testcase_nested::push_txn() { MDBX_txn *txn; - MDBX_txn_flags_t flags = - MDBX_txn_flags_t(prng32() & (MDBX_TXN_NOSYNC | MDBX_TXN_NOMETASYNC)); + MDBX_txn_flags_t flags = MDBX_txn_flags_t( + prng32() & uint32_t(MDBX_TXN_NOSYNC | MDBX_TXN_NOMETASYNC)); int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_begin(nested)", err); diff --git a/test/osal-unix.cc b/test/osal-unix.cc index 57643667..7c0bedbc 100644 --- a/test/osal-unix.cc +++ b/test/osal-unix.cc @@ -414,6 +414,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { while (sigalarm_tail == sigalarm_head) { int status; pid = waitpid(0, &status, options); + const int err = errno; if (pid > 0) { if (WIFEXITED(status)) @@ -437,20 +438,19 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { if (sigusr1_tail != sigusr1_head) { sigusr1_tail = sigusr1_head; logging::progress_canary(true); - if (pid < 0 && errno == EINTR) + if (pid < 0 && err == EINTR) continue; } if (sigusr2_tail != sigusr2_head) { sigusr2_tail = sigusr2_head; logging::progress_canary(false); - if (pid < 0 && errno == EINTR) + if (pid < 0 && err == EINTR) continue; } if (pid == 0) break; - int err = errno; if (err != EINTR) return err; } diff --git a/test/test.cc b/test/test.cc index 6a91a35a..f23858f8 100644 --- a/test/test.cc +++ b/test/test.cc @@ -310,7 +310,7 @@ void testcase::txn_inject_writefault(void) { void testcase::txn_inject_writefault(MDBX_txn *txn) { if (config.params.inject_writefaultn && txn) { if (config.params.inject_writefaultn <= nops_completed && - (mdbx_txn_flags(txn) & MDBX_RDONLY) == 0) { + (MDBX_txn_flags_t(mdbx_txn_flags(txn)) & MDBX_TXN_RDONLY) == 0) { log_verbose( "== txn_inject_writefault(): got %u nops or more, inject FAULT", config.params.inject_writefaultn); @@ -645,46 +645,389 @@ bool test_execute(const actor_config &config_const) { //----------------------------------------------------------------------------- +enum speculum_cursors : int { + lowerbound = 0, + prev = 1, + prev_prev = 2, + next = 3, + next_next = 4 +}; + +bool testcase::is_same(const Item &a, const Item &b) const { + if (!is_samedata(dataview2iov(a.first), dataview2iov(b.first))) + return false; + if ((config.params.table_flags & MDBX_DUPSORT) && + !is_samedata(dataview2iov(a.second), dataview2iov(b.second))) + return false; + return true; +} + +bool testcase::is_same(const testcase::SET::const_iterator &it, + const MDBX_val &k, const MDBX_val &v) const { + + return is_samedata(dataview2iov(it->first), k) && + is_samedata(dataview2iov(it->second), v); +} + +void testcase::verbose(const char *where, const char *stage, + const testcase::SET::const_iterator &it) const { + if (it == speculum.end()) + log_verbose("speculum-%s: %s expect END", where, stage); + else { + char dump_key[32], dump_value[32]; + MDBX_val it_key = dataview2iov(it->first); + MDBX_val it_data = dataview2iov(it->second); + log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + } +} + +void testcase::verbose(const char *where, const char *stage, + const MDBX_val &key, const MDBX_val &data, + int err) const { + char dump_key[32], dump_value[32]; + if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE) + log_verbose("speculum-%s: %s cursor {%d, %s}", where, stage, err, + mdbx_strerror(err)); + else + log_verbose("speculum-%s: %s cursor {%s, %s}", where, stage, + mdbx_dump_val(&key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&data, dump_value, sizeof(dump_value))); +} + +void testcase::speculum_check_iterator(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + const MDBX_val &key, + const MDBX_val &data) const { + char dump_key[32], dump_value[32]; + MDBX_val it_key = dataview2iov(it->first); + MDBX_val it_data = dataview2iov(it->second); + // log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, + // mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + // mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + if (!is_samedata(it_key, key)) + failure("speculum-%s: %s key mismatch %s (must) != %s", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&key, dump_value, sizeof(dump_value))); + if (!is_samedata(it_data, data)) + failure("speculum-%s: %s data mismatch %s (must) != %s", where, stage, + mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), + mdbx_dump_val(&data, dump_value, sizeof(dump_value))); +} + +void testcase::speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + int cursor_err, const MDBX_val &cursor_key, + const MDBX_val &cursor_data) const { + // verbose(where, stage, cursor_key, cursor_data, cursor_err); + // verbose(where, stage, it); + if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND && + cursor_err != MDBX_RESULT_TRUE) + failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", cursor_err, + mdbx_strerror(cursor_err)); + + char dump_key[32], dump_value[32]; + if (it == speculum.end() && cursor_err != MDBX_NOTFOUND) + failure("speculum-%s: %s extra pair {%s, %s}", where, stage, + mdbx_dump_val(&cursor_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&cursor_data, dump_value, sizeof(dump_value))); + else if (it != speculum.end() && cursor_err == MDBX_NOTFOUND) { + MDBX_val it_key = dataview2iov(it->first); + MDBX_val it_data = dataview2iov(it->second); + failure("speculum-%s: %s lack pair {%s, %s}", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + } else if (cursor_err == MDBX_SUCCESS || cursor_err == MDBX_RESULT_TRUE) + speculum_check_iterator(where, stage, it, cursor_key, cursor_data); +} + +void testcase::speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + MDBX_cursor *cursor, + const MDBX_cursor_op op) const { + MDBX_val cursor_key = {}; + MDBX_val cursor_data = {}; + int err; + if (std::next(it) == speculum.end() && op == MDBX_PREV && + (config.params.table_flags & MDBX_DUPSORT)) { + /* Workaround for MDBX/LMDB flaw */ + err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, MDBX_LAST); + if (err == MDBX_SUCCESS) + err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, MDBX_LAST_DUP); + } else + err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, op); + return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data); +} + +void testcase::speculum_prepare_cursors(const Item &item) { + int err; + assert(config.params.speculum); + if (speculum_cursors[lowerbound]) + for (auto &guard : speculum_cursors) { + if (txn_guard.get() != mdbx_cursor_txn(guard.get()) || + dbi != mdbx_cursor_dbi(guard.get())) { + err = mdbx_cursor_bind(txn_guard.get(), guard.get(), dbi); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_cursor_bind()", err); + } + } + else + for (auto &guard : speculum_cursors) { + MDBX_cursor *cursor = nullptr; + err = mdbx_cursor_open(txn_guard.get(), dbi, &cursor); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_cursor_open()", err); + guard.reset(cursor); + } + + const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); + const MDBX_val item_key = dataview2iov(item.first), + item_data = dataview2iov(item.second); + MDBX_val lowerbound_key = item_key; + MDBX_val lowerbound_data = item_data; + // verbose("prepare-cursors", "item", item_key, item_data); + err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, + MDBX_SET_LOWERBOUND); + // verbose("prepare-cursors", "lowerbound", lowerbound_key, lowerbound_data, + // err); + if (unlikely(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE && + err != MDBX_NOTFOUND)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "lowerbound", + "cursor-get", err, mdbx_strerror(err)); + + auto it_lowerbound = speculum.lower_bound(item); + // verbose("prepare-cursors", "lowerbound", it_lowerbound); + speculum_check_cursor("prepare-cursors", "lowerbound", it_lowerbound, err, + lowerbound_key, lowerbound_data); + + const auto cursor_prev = speculum_cursors[prev].get(); + err = mdbx_cursor_copy(cursor_lowerbound, cursor_prev); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev", + "cursor-copy", err, mdbx_strerror(err)); + auto it_prev = it_lowerbound; + if (it_prev != speculum.begin()) { + speculum_check_cursor("prepare-cursors", "prev", --it_prev, cursor_prev, + MDBX_PREV); + } else if ((err = mdbx_cursor_on_first(cursor_prev)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev", err, + mdbx_strerror(err)); + + const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); + err = mdbx_cursor_copy(cursor_prev, cursor_prev_prev); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev-prev", + "cursor-copy", err, mdbx_strerror(err)); + auto it_prev_prev = it_prev; + if (it_prev_prev != speculum.begin()) { + speculum_check_cursor("prepare-cursors", "prev-prev", --it_prev_prev, + cursor_prev_prev, MDBX_PREV); + } else if ((err = mdbx_cursor_on_first(cursor_prev_prev)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev-prev", + err, mdbx_strerror(err)); + + const auto cursor_next = speculum_cursors[next].get(); + err = mdbx_cursor_copy(cursor_lowerbound, cursor_next); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next", + "cursor-copy", err, mdbx_strerror(err)); + auto it_next = it_lowerbound; + if (it_next != speculum.end()) { + speculum_check_cursor("prepare-cursors", "next", ++it_next, cursor_next, + MDBX_NEXT); + } else if ((err = mdbx_cursor_on_last(cursor_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next", err, + mdbx_strerror(err)); + + const auto cursor_next_next = speculum_cursors[next_next].get(); + err = mdbx_cursor_copy(cursor_next, cursor_next_next); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next-next", + "cursor-copy", err, mdbx_strerror(err)); + auto it_next_next = it_next; + if (it_next_next != speculum.end()) { + speculum_check_cursor("prepare-cursors", "next-next", ++it_next_next, + cursor_next_next, MDBX_NEXT); + } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next-next", + err, mdbx_strerror(err)); +} + int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags) { - int err = mdbx_put(txn_guard.get(), dbi, &akey->value, &adata->value, flags); - if (err == MDBX_SUCCESS && config.params.speculum) { - const auto S_key = S(akey); - const auto S_data = S(adata); - const bool inserted = speculum.emplace(S_key, S_data).second; - assert(inserted); - (void)inserted; + int err; + bool rc = true; + Item item; + if (config.params.speculum) { + item.first = iov2dataview(akey); + item.second = iov2dataview(adata); + speculum_prepare_cursors(item); } - return err; + + err = mdbx_put(txn_guard.get(), dbi, &akey->value, &adata->value, flags); + if (err != MDBX_SUCCESS && err != MDBX_KEYEXIST) + return err; + + if (config.params.speculum) { + char dump_key[32], dump_value[32]; + const auto insertion_result = speculum.insert(item); + if (err == MDBX_KEYEXIST && insertion_result.second) { + log_error("speculum.insert: unexpected %s {%s, %s}", "MDBX_KEYEXIST", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + if (err == MDBX_SUCCESS && !insertion_result.second) { + log_error("speculum.insert: unexpected %s {%s, %s}", "MDBX_SUCCESS", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + + if (insertion_result.first != speculum.begin()) { + const auto cursor_prev = speculum_cursors[prev].get(); + auto it_prev = insertion_result.first; + speculum_check_cursor("after-insert", "prev", --it_prev, cursor_prev, + MDBX_GET_CURRENT); + if (it_prev != speculum.begin()) { + const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); + auto it_prev_prev = it_prev; + speculum_check_cursor("after-insert", "prev-prev", --it_prev_prev, + cursor_prev_prev, MDBX_GET_CURRENT); + } + } + + auto it_lowerbound = insertion_result.first; + if (++it_lowerbound != speculum.end()) { + const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); + speculum_check_cursor("after-insert", "lowerbound", it_lowerbound, + cursor_lowerbound, MDBX_GET_CURRENT); + + auto it_next = it_lowerbound; + if (++it_next != speculum.end()) { + const auto cursor_next = speculum_cursors[next].get(); + speculum_check_cursor("after-insert", "next", it_next, cursor_next, + MDBX_GET_CURRENT); + + auto it_next_next = it_next; + if (++it_next_next != speculum.end()) { + const auto cursor_next_next = speculum_cursors[next_next].get(); + speculum_check_cursor("after-insert", "next-next", it_next_next, + cursor_next_next, MDBX_GET_CURRENT); + } + } + } + } + + return rc ? MDBX_SUCCESS : MDBX_RESULT_TRUE; } int testcase::replace(const keygen::buffer &akey, const keygen::buffer &new_data, const keygen::buffer &old_data, MDBX_put_flags_t flags) { if (config.params.speculum) { - const auto S_key = S(akey); - const auto S_old = S(old_data); - const auto S_new = S(new_data); + const auto S_key = iov2dataview(akey); + const auto S_old = iov2dataview(old_data); + const auto S_new = iov2dataview(new_data); const auto removed = speculum.erase(SET::key_type(S_key, S_old)); - assert(removed == 1); - (void)removed; - const bool inserted = speculum.emplace(S_key, S_new).second; - assert(inserted); - (void)inserted; + if (unlikely(removed != 1)) { + char dump_key[128], dump_value[128]; + log_error( + "speculum-%s: %s old value {%s, %s}", "replace", + (removed > 1) ? "multi" : "no", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&old_data->value, dump_value, sizeof(dump_value))); + } + if (unlikely(!speculum.emplace(S_key, S_new).second)) { + char dump_key[128], dump_value[128]; + log_error( + "speculum-replace: new pair not inserted {%s, %s}", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&new_data->value, dump_value, sizeof(dump_value))); + } } return mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, &old_data->value, flags); } int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { + int err; + bool rc = true; + Item item; if (config.params.speculum) { - const auto S_key = S(akey); - const auto S_data = S(adata); - const auto removed = speculum.erase(SET::key_type(S_key, S_data)); - assert(removed == 1); - (void)removed; + item.first = iov2dataview(akey); + item.second = iov2dataview(adata); + speculum_prepare_cursors(item); } - return mdbx_del(txn_guard.get(), dbi, &akey->value, &adata->value); + + err = mdbx_del(txn_guard.get(), dbi, &akey->value, &adata->value); + if (err != MDBX_NOTFOUND && err != MDBX_SUCCESS) + return err; + + if (config.params.speculum) { + char dump_key[32], dump_value[32]; + const auto it_found = speculum.find(item); + if (it_found == speculum.end()) { + if (err != MDBX_NOTFOUND) { + log_error("speculum.remove: unexpected %s {%s, %s}", "MDBX_SUCCESS", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + } else { + if (err != MDBX_SUCCESS) { + log_error("speculum.remove: unexpected %s {%s, %s}", "MDBX_NOTFOUND", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + + if (it_found != speculum.begin()) { + const auto cursor_prev = speculum_cursors[prev].get(); + auto it_prev = it_found; + speculum_check_cursor("after-remove", "prev", --it_prev, cursor_prev, + MDBX_GET_CURRENT); + if (it_prev != speculum.begin()) { + const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); + auto it_prev_prev = it_prev; + speculum_check_cursor("after-remove", "prev-prev", --it_prev_prev, + cursor_prev_prev, MDBX_GET_CURRENT); + } + } + + auto it_next = it_found; + const auto cursor_next = speculum_cursors[next].get(); + const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); + if (++it_next != speculum.end()) { + speculum_check_cursor("after-remove", "next", it_next, cursor_next, + MDBX_GET_CURRENT); + speculum_check_cursor("after-remove", "lowerbound", it_next, + cursor_lowerbound, MDBX_NEXT); + + auto it_next_next = it_next; + const auto cursor_next_next = speculum_cursors[next_next].get(); + if (++it_next_next != speculum.end()) { + speculum_check_cursor("after-remove", "next-next", it_next_next, + cursor_next_next, MDBX_GET_CURRENT); + } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != + MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "next-next", + err, mdbx_strerror(err)); + } else { + if ((err = mdbx_cursor_on_last(cursor_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "next", err, + mdbx_strerror(err)); + if ((err = mdbx_cursor_on_last(cursor_lowerbound)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "lowerbound", + err, mdbx_strerror(err)); + } + + speculum.erase(it_found); + } + } + + return rc ? MDBX_SUCCESS : MDBX_RESULT_TRUE; } bool testcase::speculum_verify() { @@ -715,8 +1058,8 @@ bool testcase::speculum_verify() { akey.iov_len = avalue.iov_len = 0; akey.iov_base = avalue.iov_base = nullptr; } - const auto S_key = S(akey); - const auto S_data = S(avalue); + const auto S_key = iov2dataview(akey); + const auto S_data = iov2dataview(avalue); if (it != speculum.cend()) { mkey.iov_base = (void *)it->first.c_str(); mkey.iov_len = it->first.size(); diff --git a/test/test.h b/test/test.h index 1a765143..bcc33209 100644 --- a/test/test.h +++ b/test/test.h @@ -103,31 +103,33 @@ protected: #else using data_view = std::string; #endif - static inline data_view S(const MDBX_val &v) { + static inline data_view iov2dataview(const MDBX_val &v) { return (v.iov_base && v.iov_len) ? data_view(static_cast(v.iov_base), v.iov_len) : data_view(); } - static inline data_view S(const keygen::buffer &b) { return S(b->value); } + static inline data_view iov2dataview(const keygen::buffer &b) { + return iov2dataview(b->value); + } using Item = std::pair; + static MDBX_val dataview2iov(const data_view &v) { + MDBX_val r; + r.iov_base = (void *)v.data(); + r.iov_len = v.size(); + return r; + } struct ItemCompare { const testcase *context; ItemCompare(const testcase *owner) : context(owner) {} bool operator()(const Item &a, const Item &b) const { - MDBX_val va, vb; - va.iov_base = (void *)a.first.data(); - va.iov_len = a.first.size(); - vb.iov_base = (void *)b.first.data(); - vb.iov_len = b.first.size(); + MDBX_val va = dataview2iov(a.first), vb = dataview2iov(b.first); int cmp = mdbx_cmp(context->txn_guard.get(), context->dbi, &va, &vb); if (cmp == 0 && (context->config.params.table_flags & MDBX_DUPSORT) != 0) { - va.iov_base = (void *)a.second.data(); - va.iov_len = a.second.size(); - vb.iov_base = (void *)b.second.data(); - vb.iov_len = b.second.size(); + va = dataview2iov(a.second); + vb = dataview2iov(b.second); cmp = mdbx_dcmp(context->txn_guard.get(), context->dbi, &va, &vb); } return cmp < 0; @@ -159,6 +161,29 @@ protected: } last; SET speculum{ItemCompare(this)}, speculum_committed{ItemCompare(this)}; + scoped_cursor_guard speculum_cursors[5]; + void speculum_prepare_cursors(const Item &item); + void speculum_check_iterator(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + const MDBX_val &key, const MDBX_val &data) const; + void speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + int cursor_err, const MDBX_val &cursor_key, + const MDBX_val &cursor_data) const; + void speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + MDBX_cursor *cursor, + const MDBX_cursor_op op) const; + + void verbose(const char *where, const char *stage, + const testcase::SET::const_iterator &it) const; + void verbose(const char *where, const char *stage, const MDBX_val &key, + const MDBX_val &data, int err = MDBX_SUCCESS) const; + + bool is_same(const Item &a, const Item &b) const; + bool is_same(const SET::const_iterator &it, const MDBX_val &k, + const MDBX_val &v) const; + bool speculum_verify(); int insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags); diff --git a/test/utils.h b/test/utils.h index f00f34d1..d982df35 100644 --- a/test/utils.h +++ b/test/utils.h @@ -287,6 +287,9 @@ std::string data2hex(const void *ptr, size_t bytes, simple_checksum &checksum); bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, size_t bytes, simple_checksum &checksum); bool is_samedata(const MDBX_val *a, const MDBX_val *b); +inline bool is_samedata(const MDBX_val &a, const MDBX_val &b) { + return is_samedata(&a, &b); +} std::string format(const char *fmt, ...); uint64_t entropy_ticks(void);