From 10681a53ae43f9caa415293672912b6febbabf18 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Sun, 1 Nov 2020 10:01:19 +0300 Subject: [PATCH 01/28] mdbx: cleanup/refine GC-flags inside `mdbx_page_alloc()`. Change-Id: Ie81c703815a7b3fbe59ad9b406811d5ed2913d02 --- src/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core.c b/src/core.c index cfeeb0b3..919a8ce0 100644 --- a/src/core.c +++ b/src/core.c @@ -5063,7 +5063,7 @@ __hot static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num, if (unlikely(mc->mc_flags & C_RECLAIMING)) { /* If mc is updating the GC, then the retired-list cannot play * catch-up with itself by growing while trying to save it. */ - flags &= ~(MDBX_ALLOC_GC | MDBX_COALESCE | MDBX_LIFORECLAIM); + flags &= ~MDBX_ALLOC_GC; } else if (unlikely(txn->mt_dbs[FREE_DBI].md_entries == 0)) { /* avoid (recursive) search inside empty tree and while tree is updating, * https://github.com/erthink/libmdbx/issues/31 */ @@ -5270,8 +5270,8 @@ skip_cache: /* Stop reclaiming to avoid overflow the page list. * This is a rare case while search for a continuously multi-page region * in a large database. https://github.com/erthink/libmdbx/issues/123 */ - flags -= MDBX_ALLOC_GC; - if (unlikely(flags == 0)) { + flags &= ~MDBX_ALLOC_GC; + if (unlikely((flags & MDBX_ALLOC_ALL) == 0)) { /* Oh, we can't do anything */ rc = MDBX_TXN_FULL; goto fail; From 0c78da99b079a7bcf67ff0a95f032011b58cd1ca Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 2 Nov 2020 19:10:01 +0300 Subject: [PATCH 02/28] mdbx: add `cursor_is_tracked()` and assertions. Change-Id: I83db1a714597b134aa30e4ca7019bb7f541a2428 --- src/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core.c b/src/core.c index 919a8ce0..19cac114 100644 --- a/src/core.c +++ b/src/core.c @@ -3599,11 +3599,20 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) { (mc)->mc_xcursor->mx_cursor.mc_pg[0] = node_data(xr_node); \ } while (0) +static __maybe_unused bool cursor_is_tracked(const MDBX_cursor *mc) { + for (MDBX_cursor *scan = mc->mc_txn->mt_cursors[mc->mc_dbi]; scan; + scan = scan->mc_next) + if (mc == ((mc->mc_flags & C_SUB) ? &scan->mc_xcursor->mx_cursor : scan)) + return true; + return false; +} + /* Perform act while tracking temporary cursor mn */ #define WITH_CURSOR_TRACKING(mn, act) \ do { \ mdbx_cassert(&(mn), \ mn.mc_txn->mt_cursors != NULL /* must be not rdonly txt */); \ + mdbx_cassert(&(mn), !cursor_is_tracked(&(mn))); \ MDBX_cursor mc_dummy; \ MDBX_cursor **tracking_head = &(mn).mc_txn->mt_cursors[mn.mc_dbi]; \ MDBX_cursor *tracked = &(mn); \ @@ -12583,6 +12592,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (unlikely(rc != MDBX_SUCCESS)) return rc; + mdbx_cassert(mc, cursor_is_tracked(mc)); env = mc->mc_txn->mt_env; /* Check this first so counter will always be zero on any early failures. */ @@ -15472,6 +15482,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { unsigned nkeys; MDBX_dbi dbi = mc->mc_dbi; + mdbx_cassert(mc, cursor_is_tracked(mc)); mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); ki = mc->mc_ki[mc->mc_top]; mp = mc->mc_pg[mc->mc_top]; From b209e91ad411a63434ffe4feb256fb3f2908a2db Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 2 Nov 2020 23:54:13 +0300 Subject: [PATCH 03/28] mdbx: zeroed `mc_top` while tracking nested cursors. This avoids scanning dummy cursors during tracking. Change-Id: Id4edc665010215b4a946f9438475691f1387313c --- src/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core.c b/src/core.c index 19cac114..633fa98d 100644 --- a/src/core.c +++ b/src/core.c @@ -3618,6 +3618,7 @@ static __maybe_unused bool cursor_is_tracked(const MDBX_cursor *mc) { MDBX_cursor *tracked = &(mn); \ if ((mn).mc_flags & C_SUB) { \ mc_dummy.mc_flags = C_INITIALIZED; \ + mc_dummy.mc_top = 0; \ mc_dummy.mc_xcursor = (MDBX_xcursor *)&(mn); \ tracked = &mc_dummy; \ } \ From 37a1d546b70700ab57ad9e316709b13f5f3794e7 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 3 Nov 2020 03:01:02 +0300 Subject: [PATCH 04/28] mdbx: minor refine `mdbx_rebalance()`. Change-Id: I10a6a208b173bdee1d325aa7b2624de879814647 --- src/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core.c b/src/core.c index 633fa98d..d8ccac49 100644 --- a/src/core.c +++ b/src/core.c @@ -14926,8 +14926,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mc->mc_db->md_overflow_pages == 0 && mc->mc_db->md_leaf_pages == 1); /* Adjust cursors pointing to mp */ - const MDBX_dbi dbi = mc->mc_dbi; - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; + for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; @@ -14961,10 +14960,10 @@ static int mdbx_rebalance(MDBX_cursor *mc) { } /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2, *m3; - MDBX_dbi dbi = mc->mc_dbi; - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; + m2 = m2->mc_next) { + MDBX_cursor *m3 = + (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc || !(m3->mc_flags & C_INITIALIZED)) continue; if (m3->mc_pg[0] == mp) { @@ -15116,7 +15115,8 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); return rc; } - } else if (likely(right)) { + } + if (likely(right)) { /* try merge with right */ mdbx_cassert(mc, page_numkeys(right) >= minkeys); mn.mc_pg[mn.mc_top] = right; From 459e7698443773173fd7e5a488f06ba1975632d9 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 3 Nov 2020 20:31:12 +0300 Subject: [PATCH 05/28] mdbx: engage cursor tracking for all merge/rebalance operations. Assume this resolves https://github.com/erthink/libmdbx/issues/131. Change-Id: I5111618a34d91d083b81245ac959b744a86c4495 --- src/core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/core.c b/src/core.c index d8ccac49..86d967a4 100644 --- a/src/core.c +++ b/src/core.c @@ -14236,6 +14236,7 @@ static int mdbx_update_key(MDBX_cursor *mc, const MDBX_val *key) { int ptr, i, nkeys, indx; DKBUF; + mdbx_cassert(mc, cursor_is_tracked(mc)); indx = mc->mc_ki[mc->mc_top]; mp = mc->mc_pg[mc->mc_top]; node = page_node(mp, indx); @@ -14393,7 +14394,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { psrc = csrc->mc_pg[csrc->mc_top]; pdst = cdst->mc_pg[cdst->mc_top]; - rc = mdbx_update_key(&mn, &key); + WITH_CURSOR_TRACKING(mn, rc = mdbx_update_key(&mn, &key)); if (unlikely(rc)) return rc; } else { @@ -14608,6 +14609,8 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { int rc; mdbx_cassert(csrc, csrc != cdst); + mdbx_cassert(csrc, cursor_is_tracked(csrc)); + mdbx_cassert(cdst, cursor_is_tracked(cdst)); const MDBX_page *const psrc = csrc->mc_pg[csrc->mc_top]; MDBX_page *pdst = cdst->mc_pg[cdst->mc_top]; mdbx_debug("merging page %" PRIaPGNO " into %" PRIaPGNO, psrc->mp_pgno, @@ -14869,6 +14872,7 @@ static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { * [in] mc Cursor pointing to the page where rebalancing should begin. * Returns 0 on success, non-zero on failure. */ static int mdbx_rebalance(MDBX_cursor *mc) { + mdbx_cassert(mc, cursor_is_tracked(mc)); mdbx_cassert(mc, mc->mc_snum > 0); mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth || IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); @@ -15054,7 +15058,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1; mn.mc_ki[mn.mc_top] = 0; mc->mc_ki[mc->mc_top] = nkeys; - rc = mdbx_page_merge(&mn, mc); + WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15069,7 +15073,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1; mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1); mc->mc_ki[mc->mc_top] = 0; - rc = mdbx_node_move(&mn, mc, true); + WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, true)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top + 1; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15082,7 +15086,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1; mn.mc_ki[mn.mc_top] = 0; mc->mc_ki[mc->mc_top] = nkeys; - rc = mdbx_node_move(&mn, mc, false); + WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, false)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); @@ -15123,7 +15127,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1; mn.mc_ki[mn.mc_top] = 0; mc->mc_ki[mc->mc_top] = nkeys; - rc = mdbx_page_merge(&mn, mc); + WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc)); if (likely(rc != MDBX_RESULT_TRUE)) { mc->mc_ki[mc->mc_top] = ki_top; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); From 96143a9bb2f51b0a4acf78bb7d39ea561c6cd82c Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Wed, 4 Nov 2020 02:05:22 +0300 Subject: [PATCH 06/28] mdbx-test: always check speculum its own operations. Change-Id: I38e668ce70f0e1ccb7bf2692a3b406fed9f87f53 --- test/test.cc | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/test/test.cc b/test/test.cc index 6a91a35a..c05f38d5 100644 --- a/test/test.cc +++ b/test/test.cc @@ -651,9 +651,12 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, if (err == MDBX_SUCCESS && config.params.speculum) { const auto S_key = S(akey); const auto S_data = S(adata); - const bool inserted = speculum.emplace(S_key, S_data).second; - assert(inserted); - (void)inserted; + if (unlikely(!speculum.emplace(S_key, S_data).second)) { + char dump_key[128], dump_value[128]; + log_error("speculum-insert: pair not inserted {%s, %s}", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + } } return err; } @@ -666,11 +669,21 @@ int testcase::replace(const keygen::buffer &akey, const auto S_old = S(old_data); const auto S_new = S(new_data); const auto removed = speculum.erase(SET::key_type(S_key, S_old)); - assert(removed == 1); - (void)removed; - const bool inserted = speculum.emplace(S_key, S_new).second; - assert(inserted); - (void)inserted; + if (unlikely(removed != 1)) { + char dump_key[128], dump_value[128]; + log_error( + "speculum-%s: %s old value {%s, %s}", "replace", + (removed > 1) ? "multi" : "no", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&old_data->value, dump_value, sizeof(dump_value))); + } + if (unlikely(!speculum.emplace(S_key, S_new).second)) { + char dump_key[128], dump_value[128]; + log_error( + "speculum-replace: new pair not inserted {%s, %s}", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&new_data->value, dump_value, sizeof(dump_value))); + } } return mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, &old_data->value, flags); @@ -681,8 +694,13 @@ int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { const auto S_key = S(akey); const auto S_data = S(adata); const auto removed = speculum.erase(SET::key_type(S_key, S_data)); - assert(removed == 1); - (void)removed; + if (unlikely(removed != 1)) { + char dump_key[128], dump_value[128]; + log_error("speculum-%s: %s old value {%s, %s}", "remove", + (removed > 1) ? "multi" : "no", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + } } return mdbx_del(txn_guard.get(), dbi, &akey->value, &adata->value); } From 2489e0ba6e9a8ea16d591ce4e0ced6522d1c0ba0 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Wed, 4 Nov 2020 12:13:54 +0300 Subject: [PATCH 07/28] mdbx: refactor/rename `cursor_copy_internal()`. Change-Id: I334e68d3d424ef15cafcaafd23d61f80611fce01 --- src/core.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/core.c b/src/core.c index 86d967a4..6e585be0 100644 --- a/src/core.c +++ b/src/core.c @@ -3240,7 +3240,7 @@ static int __must_check_result mdbx_xcursor_init1(MDBX_cursor *mc, static int __must_check_result mdbx_xcursor_init2(MDBX_cursor *mc, MDBX_xcursor *src_mx, bool new_dupdata); -static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst); +static void cursor_copy_internal(const MDBX_cursor *csrc, MDBX_cursor *cdst); static int __must_check_result mdbx_drop0(MDBX_cursor *mc, int subs); static int __must_check_result mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi); @@ -14355,7 +14355,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { const unsigned snum = cdst->mc_snum; mdbx_cassert(csrc, snum > 0); MDBX_cursor mn; - mdbx_cursor_copy(cdst, &mn); + cursor_copy_internal(cdst, &mn); mn.mc_xcursor = NULL; /* must find the lowest key below dst */ rc = mdbx_page_search_lowest(&mn); @@ -14537,7 +14537,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { mdbx_debug("update separator for source page %" PRIaPGNO " to [%s]", psrc->mp_pgno, DKEY(&key)); MDBX_cursor mn; - mdbx_cursor_copy(csrc, &mn); + cursor_copy_internal(csrc, &mn); mn.mc_xcursor = NULL; mdbx_cassert(csrc, mn.mc_snum > 0); mn.mc_snum--; @@ -14572,7 +14572,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { mdbx_debug("update separator for destination page %" PRIaPGNO " to [%s]", pdst->mp_pgno, DKEY(&key)); MDBX_cursor mn; - mdbx_cursor_copy(cdst, &mn); + cursor_copy_internal(cdst, &mn); mn.mc_xcursor = NULL; mdbx_cassert(cdst, mn.mc_snum > 0); mn.mc_snum--; @@ -14655,7 +14655,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { key.iov_base = node_key(srcnode); if (pagetype & P_BRANCH) { MDBX_cursor mn; - mdbx_cursor_copy(csrc, &mn); + cursor_copy_internal(csrc, &mn); mn.mc_xcursor = NULL; /* must find the lowest key below src */ rc = mdbx_page_search_lowest(&mn); @@ -14851,7 +14851,7 @@ bailout: /* Copy the contents of a cursor. * [in] csrc The cursor to copy from. * [out] cdst The cursor to copy to. */ -static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { +static void cursor_copy_internal(const MDBX_cursor *csrc, MDBX_cursor *cdst) { mdbx_cassert(csrc, csrc->mc_txn->mt_txnid >= *csrc->mc_txn->mt_env->me_oldest); cdst->mc_txn = csrc->mc_txn; @@ -15008,7 +15008,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { /* Find neighbors. */ MDBX_cursor mn; - mdbx_cursor_copy(mc, &mn); + cursor_copy_internal(mc, &mn); mn.mc_xcursor = NULL; MDBX_page *left = nullptr, *right = nullptr; @@ -15045,7 +15045,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { /* We want mdbx_rebalance to find mn when doing fixups */ WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn)); if (likely(rc != MDBX_RESULT_TRUE)) { - mdbx_cursor_copy(&mn, mc); + cursor_copy_internal(&mn, mc); mc->mc_ki[mc->mc_top] = new_ki; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); return rc; @@ -15114,7 +15114,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { /* We want mdbx_rebalance to find mn when doing fixups */ WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn)); if (likely(rc != MDBX_RESULT_TRUE)) { - mdbx_cursor_copy(&mn, mc); + cursor_copy_internal(&mn, mc); mc->mc_ki[mc->mc_top] = new_ki; mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys); return rc; @@ -15738,7 +15738,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, mdbx_debug("parent branch page is %" PRIaPGNO, mc->mc_pg[ptop]->mp_pgno); } - mdbx_cursor_copy(mc, &mn); + cursor_copy_internal(mc, &mn); mn.mc_xcursor = NULL; mn.mc_pg[mn.mc_top] = rp; mn.mc_ki[mn.mc_top] = 0; @@ -17607,7 +17607,7 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) { if (unlikely(rc)) goto done; - mdbx_cursor_copy(mc, &mx); + cursor_copy_internal(mc, &mx); while (mc->mc_snum > 0) { MDBX_page *mp = mc->mc_pg[mc->mc_top]; unsigned n = page_numkeys(mp); @@ -18781,7 +18781,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return MDBX_ENODATA; MDBX_cursor_couple next; - mdbx_cursor_copy(cursor, &next.outer); + cursor_copy_internal(cursor, &next.outer); next.outer.mc_xcursor = NULL; if (cursor->mc_db->md_flags & MDBX_DUPSORT) { next.outer.mc_xcursor = &next.inner; @@ -18789,7 +18789,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, if (unlikely(rc != MDBX_SUCCESS)) return rc; MDBX_xcursor *mx = &container_of(cursor, MDBX_cursor_couple, outer)->inner; - mdbx_cursor_copy(&mx->mx_cursor, &next.inner.mx_cursor); + cursor_copy_internal(&mx->mx_cursor, &next.inner.mx_cursor); } MDBX_val stub = {0, 0}; From b6f0070f85787209359469f5f7d8e83ffc5cd4f7 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Wed, 4 Nov 2020 18:15:14 +0300 Subject: [PATCH 08/28] mdbx: add `mdbx_cursor_copy()`. Change-Id: I476f1230beec6a550897bef236745530154c1079 --- mdbx.h | 12 ++++++++++++ src/core.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/mdbx.h b/mdbx.h index 03fa4afa..79a6ba7c 100644 --- a/mdbx.h +++ b/mdbx.h @@ -3861,6 +3861,18 @@ mdbx_cursor_txn(const MDBX_cursor *cursor); * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). */ LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *cursor); +/** \brief Copy cursor position and state. + * \ingroup c_cursors + * + * \param [in] src A source cursor handle returned + * by \ref mdbx_cursor_create() or \ref mdbx_cursor_open(). + * + * \param [in,out] dest A destination cursor handle returned + * by \ref mdbx_cursor_create() or \ref mdbx_cursor_open(). + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); + /** \brief Retrieve by cursor. * \ingroup c_crud * diff --git a/src/core.c b/src/core.c index 6e585be0..dff3b258 100644 --- a/src/core.c +++ b/src/core.c @@ -14140,6 +14140,34 @@ int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; } +int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { + if (unlikely(!src)) + return MDBX_EINVAL; + if (unlikely(src->mc_signature != MDBX_MC_LIVE)) + return MDBX_EBADSIGN; + + int rc = mdbx_cursor_bind(src->mc_txn, dest, src->mc_dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + +again: + dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK; + dest->mc_top = src->mc_top; + dest->mc_snum = src->mc_snum; + for (unsigned i = 0; i < src->mc_snum; ++i) { + dest->mc_ki[i] = src->mc_ki[i]; + dest->mc_pg[i] = src->mc_pg[i]; + } + + if (src->mc_xcursor) { + src = &src->mc_xcursor->mx_cursor; + dest = &dest->mc_xcursor->mx_cursor; + goto again; + } + + return MDBX_SUCCESS; +} + /* Return the count of duplicate data items for the current key */ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { if (unlikely(mc == NULL)) From b1446b77521789b2748bba16b4d14d9146461857 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Thu, 5 Nov 2020 08:25:20 +0300 Subject: [PATCH 09/28] mdbx: minor fix/distinction `MDBX_EBADSIGN` and `MDBX_EINVAL` errors for cursors. Change-Id: I6d72638a69ff0f793156fe5e0e7ca5531a97c7cf --- src/core.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/src/core.c b/src/core.c index dff3b258..2bb7f6ee 100644 --- a/src/core.c +++ b/src/core.c @@ -12374,7 +12374,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -12587,7 +12588,8 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -13386,7 +13388,8 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -14050,7 +14053,7 @@ int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) { if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EINVAL; + return MDBX_EBADSIGN; MDBX_cursor_couple *couple = container_of(mc, MDBX_cursor_couple, outer); couple->mc_userctx = ctx; @@ -14074,7 +14077,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE)) { - if (unlikely(mc->mc_signature != MDBX_MC_LIVE || mc->mc_backup)) + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return MDBX_EBADSIGN; + if (unlikely(mc->mc_backup)) return MDBX_EINVAL; if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) return MDBX_PROBLEM; @@ -14144,7 +14149,8 @@ int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { if (unlikely(!src)) return MDBX_EINVAL; if (unlikely(src->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (src->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = mdbx_cursor_bind(src->mc_txn, dest, src->mc_dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -14174,7 +14180,8 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -18563,7 +18570,8 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) return MDBX_RESULT_FALSE; @@ -18581,7 +18589,8 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) return MDBX_RESULT_FALSE; @@ -18600,7 +18609,8 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) { return MDBX_EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; if ((mc->mc_flags & C_INITIALIZED) == 0) return MDBX_RESULT_TRUE; @@ -18631,9 +18641,13 @@ __hot static int cursor_diff(const MDBX_cursor *const __restrict x, r->level = 0; r->root_nkeys = 0; - if (unlikely(y->mc_signature != MDBX_MC_LIVE || - x->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + if (unlikely(x->mc_signature != MDBX_MC_LIVE)) + return (x->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (unlikely(y->mc_signature != MDBX_MC_LIVE)) + return (y->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(x->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -18799,7 +18813,8 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(cursor->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; + return (cursor->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; int rc = check_txn(cursor->mc_txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) From c55f99073b1990457a2e93876cb8b51b09ce534e Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Fri, 6 Nov 2020 14:22:06 +0300 Subject: [PATCH 10/28] mdbx: fix `MDBX_NEXT` for EOF case. Change-Id: I84c033a24d2215a4867affab5b148bf6ba5d57dc --- src/core.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/core.c b/src/core.c index 2bb7f6ee..431395ce 100644 --- a/src/core.c +++ b/src/core.c @@ -11889,7 +11889,7 @@ static int mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, goto skip; } - if (mc->mc_ki[mc->mc_top] + 1u >= page_numkeys(mp)) { + if (++mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) { mdbx_debug("%s", "=====> move to next sibling page"); if (unlikely((rc = mdbx_cursor_sibling(mc, SIBLING_RIGHT)) != MDBX_SUCCESS)) { @@ -11899,8 +11899,7 @@ static int mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mp = mc->mc_pg[mc->mc_top]; mdbx_debug("next page is %" PRIaPGNO ", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); - } else - mc->mc_ki[mc->mc_top]++; + } skip: mdbx_debug("==> cursor points to page %" PRIaPGNO @@ -11984,9 +11983,8 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (mc->mc_ki[mc->mc_top] == 0) { mdbx_debug("%s", "=====> move to prev sibling page"); - if ((rc = mdbx_cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS) { + if ((rc = mdbx_cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS) return rc; - } mp = mc->mc_pg[mc->mc_top]; mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1; mdbx_debug("prev page is %" PRIaPGNO ", key index %u", mp->mp_pgno, @@ -13254,18 +13252,17 @@ new_sub:; rc = mdbx_node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, rdata, nflags); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2, *m3; - MDBX_dbi dbi = mc->mc_dbi; - unsigned i = mc->mc_top; - MDBX_page *mp = mc->mc_pg[i]; - - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { - m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + const MDBX_dbi dbi = mc->mc_dbi; + const unsigned i = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[i]; + for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; + m2 = m2->mc_next) { + MDBX_cursor *m3 = + (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[i] != mp) continue; - if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { - m3->mc_ki[i]++; - } + if (m3->mc_ki[i] >= mc->mc_ki[i]) + m3->mc_ki[i] += insert_key; if (XCURSOR_INITED(m3)) XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); } From 5f09ec73c99845122fe95e221e240f72ed8878ec Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Sat, 7 Nov 2020 16:23:50 +0300 Subject: [PATCH 11/28] mdbx: return `MDBX_ENODATA` instead of `MDBX_EINVAL` for non-positioned/EOF cursor and `MDBX_GET_CURRENT`. Change-Id: I2adf76f8f662e77e5a6aa077344fab6b430975c4 --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 431395ce..57f58759 100644 --- a/src/core.c +++ b/src/core.c @@ -12384,7 +12384,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, switch (op) { case MDBX_GET_CURRENT: { if (unlikely(!(mc->mc_flags & C_INITIALIZED))) - return MDBX_EINVAL; + return MDBX_ENODATA; MDBX_page *mp = mc->mc_pg[mc->mc_top]; const unsigned nkeys = page_numkeys(mp); if (mc->mc_ki[mc->mc_top] >= nkeys) { From 9a2dbb845cf2d9c6a75cd72d5ce3d3292fd90cc9 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Sun, 8 Nov 2020 16:15:34 +0300 Subject: [PATCH 12/28] mdbx: refactor/refine cursor next/prev. Change-Id: I0bfe0fed62e137e933027ba90cec23dbf1a6310b --- src/core.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/core.c b/src/core.c index 57f58759..74fe91f8 100644 --- a/src/core.c +++ b/src/core.c @@ -11274,13 +11274,11 @@ static void mdbx_cursor_adjust(MDBX_cursor *mc, func) { #endif /* Pop a page off the top of the cursor's stack. */ -static void mdbx_cursor_pop(MDBX_cursor *mc) { +static __inline void mdbx_cursor_pop(MDBX_cursor *mc) { if (mc->mc_snum) { mdbx_debug("popped page %" PRIaPGNO " off db %d cursor %p", mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *)mc); - - mc->mc_snum--; - if (mc->mc_snum) { + if (--mc->mc_snum) { mc->mc_top--; } else { mc->mc_flags &= ~C_INITIALIZED; @@ -11290,7 +11288,7 @@ static void mdbx_cursor_pop(MDBX_cursor *mc) { /* Push a page onto the top of the cursor's stack. * Set MDBX_TXN_ERROR on failure. */ -static int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) { +static __inline int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) { mdbx_debug("pushing page %" PRIaPGNO " on db %d cursor %p", mp->mp_pgno, DDBI(mc), (void *)mc); @@ -11809,7 +11807,7 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int dir) { if ((dir == SIBLING_RIGHT) ? (mc->mc_ki[mc->mc_top] + 1u >= page_numkeys(mc->mc_pg[mc->mc_top])) : (mc->mc_ki[mc->mc_top] == 0)) { - mdbx_debug("no more keys left, moving to %s sibling", + mdbx_debug("no more keys aside, moving to next %s sibling", dir ? "right" : "left"); if (unlikely((rc = mdbx_cursor_sibling(mc, dir)) != MDBX_SUCCESS)) { /* undo cursor_pop before returning */ @@ -11837,9 +11835,9 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int dir) { rc = mdbx_cursor_push(mc, mp); if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (dir == SIBLING_LEFT) - mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1; + mc->mc_ki[mc->mc_top] = + (indx_t)((dir == SIBLING_LEFT) ? page_numkeys(mp) - 1 : 0); return MDBX_SUCCESS; } @@ -11889,8 +11887,12 @@ static int mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, goto skip; } - if (++mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) { + int ki = mc->mc_ki[mc->mc_top]; + mc->mc_ki[mc->mc_top] = (indx_t)++ki; + const int numkeys = page_numkeys(mp); + if (unlikely(ki >= numkeys)) { mdbx_debug("%s", "=====> move to next sibling page"); + mc->mc_ki[mc->mc_top] = numkeys - 1; if (unlikely((rc = mdbx_cursor_sibling(mc, SIBLING_RIGHT)) != MDBX_SUCCESS)) { mc->mc_flags |= C_EOF; @@ -11981,17 +11983,17 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mc->mc_flags &= ~(C_EOF | C_DEL); - if (mc->mc_ki[mc->mc_top] == 0) { + int ki = mc->mc_ki[mc->mc_top]; + mc->mc_ki[mc->mc_top] = (indx_t)--ki; + if (unlikely(ki < 0)) { + mc->mc_ki[mc->mc_top] = 0; mdbx_debug("%s", "=====> move to prev sibling page"); if ((rc = mdbx_cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS) return rc; mp = mc->mc_pg[mc->mc_top]; - mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1; mdbx_debug("prev page is %" PRIaPGNO ", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); - } else - mc->mc_ki[mc->mc_top]--; - + } mdbx_debug("==> cursor points to page %" PRIaPGNO " with %u keys, key index %u", mp->mp_pgno, page_numkeys(mp), mc->mc_ki[mc->mc_top]); From ca115dd6a4a6221d677f617ee34b9860743272fe Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 9 Nov 2020 01:38:46 +0300 Subject: [PATCH 13/28] mdbx: refactor/rename/refine `tw.cursors` internals. Change-Id: Ie46d15f52a9d7365b52534a630754a31d3005a69 --- src/core.c | 112 +++++++++++++++++++++++++----------------------- src/internals.h | 4 +- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/src/core.c b/src/core.c index 74fe91f8..187b0593 100644 --- a/src/core.c +++ b/src/core.c @@ -3600,7 +3600,7 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) { } while (0) static __maybe_unused bool cursor_is_tracked(const MDBX_cursor *mc) { - for (MDBX_cursor *scan = mc->mc_txn->mt_cursors[mc->mc_dbi]; scan; + for (MDBX_cursor *scan = mc->mc_txn->tw.cursors[mc->mc_dbi]; scan; scan = scan->mc_next) if (mc == ((mc->mc_flags & C_SUB) ? &scan->mc_xcursor->mx_cursor : scan)) return true; @@ -3611,10 +3611,10 @@ static __maybe_unused bool cursor_is_tracked(const MDBX_cursor *mc) { #define WITH_CURSOR_TRACKING(mn, act) \ do { \ mdbx_cassert(&(mn), \ - mn.mc_txn->mt_cursors != NULL /* must be not rdonly txt */); \ + mn.mc_txn->tw.cursors != NULL /* must be not rdonly txt */); \ mdbx_cassert(&(mn), !cursor_is_tracked(&(mn))); \ MDBX_cursor mc_dummy; \ - MDBX_cursor **tracking_head = &(mn).mc_txn->mt_cursors[mn.mc_dbi]; \ + MDBX_cursor **tracking_head = &(mn).mc_txn->tw.cursors[mn.mc_dbi]; \ MDBX_cursor *tracked = &(mn); \ if ((mn).mc_flags & C_SUB) { \ mc_dummy.mc_flags = C_INITIALIZED; \ @@ -4201,7 +4201,7 @@ static int mdbx_pages_xkeep(MDBX_cursor *mc, unsigned pflags, bool all) { } } mc = mc->mc_next; - for (; !mc || mc == m0; mc = txn->mt_cursors[--i]) + for (; !mc || mc == m0; mc = txn->tw.cursors[--i]) if (i == 0) goto mark_done; } @@ -5702,7 +5702,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) { done: /* Adjust cursors pointing to mp */ mc->mc_pg[mc->mc_top] = np; - m2 = txn->mt_cursors[mc->mc_dbi]; + m2 = txn->tw.cursors[mc->mc_dbi]; if (mc->mc_flags & C_SUB) { for (; m2; m2 = m2->mc_next) { m3 = &m2->mc_xcursor->mx_cursor; @@ -5866,8 +5866,8 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { MDBX_xcursor *mx; for (int i = src->mt_numdbs; --i >= 0;) { - dst->mt_cursors[i] = NULL; - if ((mc = src->mt_cursors[i]) != NULL) { + dst->tw.cursors[i] = NULL; + if ((mc = src->tw.cursors[i]) != NULL) { size_t size = sizeof(MDBX_cursor); if (mc->mc_xcursor) size += sizeof(MDBX_xcursor); @@ -5887,8 +5887,8 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { *(MDBX_xcursor *)(bk + 1) = *mx; mx->mx_cursor.mc_txn = dst; } - mc->mc_next = dst->mt_cursors[i]; - dst->mt_cursors[i] = mc; + mc->mc_next = dst->tw.cursors[i]; + dst->tw.cursors[i] = mc; } } } @@ -5902,7 +5902,7 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { * * Returns 0 on success, non-zero on failure. */ static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) { - MDBX_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; + MDBX_cursor **cursors = txn->tw.cursors, *mc, *next, *bk; MDBX_xcursor *mx; int i; @@ -6586,7 +6586,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (parent) { mdbx_tassert(txn, mdbx_dirtylist_check(parent)); - txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->tw.cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_dbiseqs = parent->mt_dbiseqs; txn->tw.dirtylist = mdbx_malloc(sizeof(MDBX_DP) * (MDBX_DPL_TXNFULL + 1)); txn->tw.reclaimed_pglist = @@ -7235,8 +7235,8 @@ static int mdbx_update_gc(MDBX_txn *txn) { goto bailout_notracking; couple.outer.mc_flags |= C_RECLAIMING; - couple.outer.mc_next = txn->mt_cursors[FREE_DBI]; - txn->mt_cursors[FREE_DBI] = &couple.outer; + couple.outer.mc_next = txn->tw.cursors[FREE_DBI]; + txn->tw.cursors[FREE_DBI] = &couple.outer; retry: ++loop; @@ -7888,7 +7888,7 @@ retry_noaccount: cleaned_gc_slot == MDBX_PNL_SIZE(txn->tw.lifo_reclaimed)); bailout: - txn->mt_cursors[FREE_DBI] = couple.outer.mc_next; + txn->tw.cursors[FREE_DBI] = couple.outer.mc_next; bailout_notracking: MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) = 0; @@ -10873,8 +10873,8 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname, MDBX_txn *txn = mdbx_calloc(1, size); if (txn) { txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); - txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); - txn->mt_dbiseqs = (unsigned *)(txn->mt_cursors + env->me_maxdbs); + txn->tw.cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbiseqs = (unsigned *)(txn->tw.cursors + env->me_maxdbs); txn->mt_dbistate = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_env = env; txn->mt_dbxs = env->me_dbxs; @@ -11265,7 +11265,7 @@ static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, const MDBX_val *key, static void mdbx_cursor_adjust(MDBX_cursor *mc, func) { MDBX_cursor *m2; - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) { func(mc, m2); } @@ -13257,7 +13257,7 @@ new_sub:; const MDBX_dbi dbi = mc->mc_dbi; const unsigned i = mc->mc_top; MDBX_page *const mp = mc->mc_pg[i]; - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; @@ -13314,7 +13314,7 @@ new_sub:; MDBX_page *mp = mc->mc_pg[i]; const int nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) @@ -13440,7 +13440,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { node = page_node(mp, mc->mc_ki[mc->mc_top]); mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); /* fix other sub-DB cursors pointed at fake pages on this page */ - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) @@ -14082,8 +14082,8 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { return MDBX_EINVAL; if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) return MDBX_PROBLEM; - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->tw.cursors) { + MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi]; while (*prev && *prev != mc) prev = &(*prev)->mc_next; if (*prev == mc) @@ -14112,9 +14112,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (txn->mt_cursors) { - mc->mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = mc; + if (!(txn->mt_flags & MDBX_TXN_RDONLY)) { + mc->mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = mc; mc->mc_flags |= C_UNTRACK; } @@ -14222,17 +14222,19 @@ void mdbx_cursor_close(MDBX_cursor *mc) { /* Remove from txn, if tracked. * A read-only txn (!C_UNTRACK) may have been freed already, * so do not peek inside it. Only write txns track cursors. */ - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + if (mc->mc_flags & C_UNTRACK) { + mdbx_cassert(mc, !(mc->mc_txn->mt_flags & MDBX_TXN_RDONLY)); + MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi]; while (*prev && *prev != mc) prev = &(*prev)->mc_next; - if (*prev == mc) - *prev = mc->mc_next; + mdbx_cassert(mc, *prev == mc); + *prev = mc->mc_next; } mc->mc_signature = 0; + mc->mc_next = mc; mdbx_free(mc); } else { - /* cursor closed before nested txn ends */ + /* Cursor closed before nested txn ends */ mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE); mc->mc_signature = MDBX_MC_WAIT4EOT; } @@ -14512,7 +14514,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { mdbx_cassert(csrc, csrc->mc_top == cdst->mc_top); if (fromleft) { /* If we're adding on the left, bump others up */ - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top) continue; @@ -14532,7 +14534,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { } } else { /* Adding on the right, bump others down */ - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == csrc) continue; @@ -14774,7 +14776,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { const MDBX_dbi dbi = csrc->mc_dbi; const unsigned top = csrc->mc_top; - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == csrc || top >= m3->mc_snum) continue; @@ -14964,7 +14966,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { mc->mc_db->md_overflow_pages == 0 && mc->mc_db->md_leaf_pages == 1); /* Adjust cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; @@ -14998,7 +15000,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) { } /* Adjust other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; @@ -15529,7 +15531,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { mc->mc_db->md_entries--; /* Adjust other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc || !(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) continue; @@ -15574,7 +15576,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { nkeys == 0)); /* Adjust this and other cursors pointing to mp */ - for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) continue; @@ -15682,10 +15684,10 @@ static int mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * is larger than the current one, the parent page may * run out of space, triggering a split. We need this * cursor to be consistent until the end of the rebalance. */ - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; rc = mdbx_cursor_del(&cx.outer, flags); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; } return rc; } @@ -16121,7 +16123,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, MDBX_dbi dbi = mc->mc_dbi; nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { + for (m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) { m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; if (m3 == mc) continue; @@ -16197,8 +16199,8 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, rc = mdbx_cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; /* LY: support for update (explicit overwrite) */ if (flags & MDBX_CURRENT) { @@ -16219,7 +16221,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, if (likely(rc == MDBX_SUCCESS)) rc = mdbx_cursor_put(&cx.outer, key, data, flags); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; return rc; } @@ -17504,6 +17506,8 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags, txn->mt_dbistate[slot] = (uint8_t)dbiflags; txn->mt_dbxs[slot].md_name.iov_base = namedup; txn->mt_dbxs[slot].md_name.iov_len = len; + if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) + txn->tw.cursors[slot] = NULL; txn->mt_numdbs += (slot == txn->mt_numdbs); if ((dbiflags & DBI_CREAT) == 0) { env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; @@ -17738,7 +17742,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { rc = mdbx_drop0(mc, mc->mc_db->md_flags & MDBX_DUPSORT); /* Invalidate the dropped DB's cursors */ - for (MDBX_cursor *m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) + for (MDBX_cursor *m2 = txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) m2->mc_flags &= ~(C_INITIALIZED | C_EOF); if (unlikely(rc)) goto bailout; @@ -19068,8 +19072,8 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, rc = mdbx_cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; MDBX_val present_key = *key; if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) { @@ -19145,7 +19149,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, rc = mdbx_cursor_del(&cx.outer, flags & MDBX_ALLDUPS); bailout: - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; return rc; } @@ -19669,10 +19673,10 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, rc = mdbx_cursor_set(&cx.outer, key, &old_data, MDBX_SET, NULL); if (unlikely(rc != MDBX_SUCCESS)) { if (rc == MDBX_NOTFOUND && data) { - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; rc = mdbx_cursor_put_attr(&cx.outer, key, data, attr, 0); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; } return rc; } @@ -19687,11 +19691,11 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, old_data.iov_len) == 0))) return MDBX_SUCCESS; - cx.outer.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &cx.outer; + cx.outer.mc_next = txn->tw.cursors[dbi]; + txn->tw.cursors[dbi] = &cx.outer; rc = mdbx_cursor_put_attr(&cx.outer, key, data ? data : &old_data, attr, MDBX_CURRENT); - txn->mt_cursors[dbi] = cx.outer.mc_next; + txn->tw.cursors[dbi] = cx.outer.mc_next; return rc; } #endif /* MDBX_NEXENTA_ATTRS */ diff --git a/src/internals.h b/src/internals.h index 72b62f60..85c00493 100644 --- a/src/internals.h +++ b/src/internals.h @@ -778,8 +778,6 @@ struct MDBX_txn { MDBX_db *mt_dbs; /* Array of sequence numbers for each DB handle */ unsigned *mt_dbiseqs; - /* In write txns, array of cursors for each DB */ - MDBX_cursor **mt_cursors; /* Transaction DBI Flags */ #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ @@ -806,6 +804,8 @@ struct MDBX_txn { MDBX_reader *reader; } to; struct { + /* In write txns, array of cursors for each DB */ + MDBX_cursor **cursors; pgno_t *reclaimed_pglist; /* Reclaimed GC pages */ txnid_t last_reclaimed; /* ID of last used record */ pgno_t loose_refund_wl /* FIXME: describe */; From e328c1f82943c3fbcd87db690f483ecb04f12fde Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 10 Nov 2020 02:18:43 +0300 Subject: [PATCH 14/28] mdbx: refactor/move `mdbx_cursor_count()`. Change-Id: I5487342013dfc3a9cf12d9081471ace0c37de23d --- src/core.c | 82 +++++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/src/core.c b/src/core.c index 187b0593..fe4ee848 100644 --- a/src/core.c +++ b/src/core.c @@ -14173,47 +14173,6 @@ again: return MDBX_SUCCESS; } -/* Return the count of duplicate data items for the current key */ -int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { - if (unlikely(mc == NULL)) - return MDBX_EINVAL; - - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL - : MDBX_EBADSIGN; - - int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(countp == NULL || !(mc->mc_flags & C_INITIALIZED))) - return MDBX_EINVAL; - - if (!mc->mc_snum) { - *countp = 0; - return MDBX_NOTFOUND; - } - - MDBX_page *mp = mc->mc_pg[mc->mc_top]; - if ((mc->mc_flags & C_EOF) && mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) { - *countp = 0; - return MDBX_NOTFOUND; - } - - *countp = 1; - if (mc->mc_xcursor != NULL) { - MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (F_ISSET(node_flags(node), F_DUPDATA)) { - mdbx_cassert(mc, mc->mc_xcursor && (mc->mc_xcursor->mx_cursor.mc_flags & - C_INITIALIZED)); - *countp = unlikely(mc->mc_xcursor->mx_db.md_entries > PTRDIFF_MAX) - ? PTRDIFF_MAX - : (size_t)mc->mc_xcursor->mx_db.md_entries; - } - } - return MDBX_SUCCESS; -} - void mdbx_cursor_close(MDBX_cursor *mc) { if (mc) { mdbx_ensure(NULL, mc->mc_signature == MDBX_MC_LIVE || @@ -14258,6 +14217,47 @@ MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { return mc->mc_dbi; } +/* Return the count of duplicate data items for the current key */ +int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) { + if (unlikely(mc == NULL)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(countp == NULL || !(mc->mc_flags & C_INITIALIZED))) + return MDBX_EINVAL; + + if (!mc->mc_snum) { + *countp = 0; + return MDBX_NOTFOUND; + } + + MDBX_page *mp = mc->mc_pg[mc->mc_top]; + if ((mc->mc_flags & C_EOF) && mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) { + *countp = 0; + return MDBX_NOTFOUND; + } + + *countp = 1; + if (mc->mc_xcursor != NULL) { + MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); + if (F_ISSET(node_flags(node), F_DUPDATA)) { + mdbx_cassert(mc, mc->mc_xcursor && (mc->mc_xcursor->mx_cursor.mc_flags & + C_INITIALIZED)); + *countp = unlikely(mc->mc_xcursor->mx_db.md_entries > PTRDIFF_MAX) + ? PTRDIFF_MAX + : (size_t)mc->mc_xcursor->mx_db.md_entries; + } + } + return MDBX_SUCCESS; +} + /* Replace the key for a branch node with a new key. * Set MDBX_TXN_ERROR on failure. * [in] mc Cursor pointing to the node to operate on. From 4ea2bea22e31510226d07ae63dc55ce3af72b4fa Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Wed, 11 Nov 2020 18:03:26 +0300 Subject: [PATCH 15/28] mdbx: fix save/restore/commit cursors for nested txn. Change-Id: Ibaf356bbc631ca2e41058787bd3e4aaaa8bc085a --- src/core.c | 174 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 106 insertions(+), 68 deletions(-) diff --git a/src/core.c b/src/core.c index fe4ee848..afd60395 100644 --- a/src/core.c +++ b/src/core.c @@ -5861,34 +5861,35 @@ __cold int mdbx_env_sync_poll(MDBX_env *env) { } /* Back up parent txn's cursors, then grab the originals for tracking */ -static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { - MDBX_cursor *mc, *bk; - MDBX_xcursor *mx; - - for (int i = src->mt_numdbs; --i >= 0;) { - dst->tw.cursors[i] = NULL; - if ((mc = src->tw.cursors[i]) != NULL) { - size_t size = sizeof(MDBX_cursor); - if (mc->mc_xcursor) - size += sizeof(MDBX_xcursor); - for (; mc; mc = bk->mc_next) { +static int mdbx_cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { + for (int i = parent->mt_numdbs; --i >= 0;) { + nested->tw.cursors[i] = NULL; + MDBX_cursor *mc = parent->tw.cursors[i]; + if (mc != NULL) { + size_t size = mc->mc_xcursor ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) + : sizeof(MDBX_cursor); + for (MDBX_cursor *bk; mc; mc = bk->mc_next) { + bk = mc; + if (mc->mc_signature != MDBX_MC_LIVE) + continue; bk = mdbx_malloc(size); if (unlikely(!bk)) return MDBX_ENOMEM; *bk = *mc; mc->mc_backup = bk; - mc->mc_db = &dst->mt_dbs[i]; /* Kill pointers into src to reduce abuse: The * user may not use mc until dst ends. But we need a valid * txn pointer here for cursor fixups to keep working. */ - mc->mc_txn = dst; - mc->mc_dbistate = &dst->mt_dbistate[i]; - if ((mx = mc->mc_xcursor) != NULL) { + mc->mc_txn = nested; + mc->mc_db = &nested->mt_dbs[i]; + mc->mc_dbistate = &nested->mt_dbistate[i]; + MDBX_xcursor *mx = mc->mc_xcursor; + if (mx != NULL) { *(MDBX_xcursor *)(bk + 1) = *mx; - mx->mx_cursor.mc_txn = dst; + mx->mx_cursor.mc_txn = nested; } - mc->mc_next = dst->tw.cursors[i]; - dst->tw.cursors[i] = mc; + mc->mc_next = nested->tw.cursors[i]; + nested->tw.cursors[i] = mc; } } } @@ -5901,47 +5902,56 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { * [in] merge true to keep changes to parent cursors, false to revert. * * Returns 0 on success, non-zero on failure. */ -static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) { - MDBX_cursor **cursors = txn->tw.cursors, *mc, *next, *bk; - MDBX_xcursor *mx; - int i; - - for (i = txn->mt_numdbs; --i >= 0;) { - for (mc = cursors[i]; mc; mc = next) { - unsigned stage = mc->mc_signature; - mdbx_ensure(txn->mt_env, - stage == MDBX_MC_LIVE || stage == MDBX_MC_WAIT4EOT); +static void mdbx_cursors_eot(MDBX_txn *txn, const bool merge) { + for (int i = txn->mt_numdbs; --i >= 0;) { + MDBX_cursor *next, *mc = txn->tw.cursors[i]; + if (!mc) + continue; + txn->tw.cursors[i] = NULL; + do { + const unsigned stage = mc->mc_signature; + MDBX_cursor *bk = mc->mc_backup; next = mc->mc_next; - mdbx_tassert(txn, !next || next->mc_signature == MDBX_MC_LIVE || - next->mc_signature == MDBX_MC_WAIT4EOT); - if ((bk = mc->mc_backup) != NULL) { - if (merge) { - /* Commit changes to parent txn */ + mdbx_ensure(txn->mt_env, + stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); + mdbx_cassert(mc, mc->mc_dbi == (unsigned)i); + if (bk) { + MDBX_xcursor *mx = mc->mc_xcursor; + mdbx_cassert(mc, mx == bk->mc_xcursor); + mdbx_tassert(txn, txn->mt_parent != NULL); + mdbx_ensure(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); + if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) + mc->mc_signature = stage /* Promote closed state to parent txn */; + else if (merge) { + /* Preserve changes from nested to parent txn */ mc->mc_next = bk->mc_next; mc->mc_backup = bk->mc_backup; mc->mc_txn = bk->mc_txn; + *bk->mc_db = *mc->mc_db; mc->mc_db = bk->mc_db; + *bk->mc_dbistate = *mc->mc_dbistate; mc->mc_dbistate = bk->mc_dbistate; - if ((mx = mc->mc_xcursor) != NULL) + if (mx) { + if (mx != bk->mc_xcursor) { + *bk->mc_xcursor = *mx; + mx = bk->mc_xcursor; + } mx->mx_cursor.mc_txn = bk->mc_txn; + } } else { - /* Abort nested txn */ + /* Restore from backup, i.e. rollback/abort nested txn */ *mc = *bk; - if ((mx = mc->mc_xcursor) != NULL) + if (mx) *mx = *(MDBX_xcursor *)(bk + 1); } bk->mc_signature = 0; mdbx_free(bk); - } - if (stage == MDBX_MC_WAIT4EOT) { - mc->mc_signature = 0; - mdbx_free(mc); } else { - mc->mc_signature = MDBX_MC_READY4CLOSE; + mdbx_ensure(txn->mt_env, stage == MDBX_MC_LIVE); + mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; mc->mc_flags = 0 /* reset C_UNTRACK */; } - } - cursors[i] = NULL; + } while ((mc = next) != NULL); } } @@ -6912,7 +6922,7 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { /* Export or close DBI handles created in this txn */ mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ - mdbx_cursors_eot(txn, 0); + mdbx_cursors_eot(txn, false); if (!(env->me_flags & MDBX_WRITEMAP)) mdbx_dlist_free(txn); @@ -8153,7 +8163,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { parent->mt_flags |= txn->mt_flags & MDBX_TXN_DIRTY; /* Merge our cursors into parent's and close them */ - mdbx_cursors_eot(txn, 1); + mdbx_cursors_eot(txn, true); + end_mode |= MDBX_END_EOTDONE; /* Update parent's DB table. */ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); @@ -8379,7 +8390,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == MDBX_DPL_TXNFULL); - mdbx_cursors_eot(txn, 0); + mdbx_cursors_eot(txn, false); end_mode |= MDBX_END_EOTDONE; if (txn->tw.dirtylist->length == 0 && @@ -14075,28 +14086,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return MDBX_EINVAL; - if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE)) { - if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) - return MDBX_EBADSIGN; - if (unlikely(mc->mc_backup)) - return MDBX_EINVAL; - if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) - return MDBX_PROBLEM; - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->tw.cursors) { - MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - if (*prev == mc) - *prev = mc->mc_next; - } - mc->mc_signature = MDBX_MC_READY4CLOSE; - mc->mc_flags = 0; - mc->mc_dbi = UINT_MAX; - } - - assert(!mc->mc_backup && !mc->mc_flags); - if (unlikely(mc->mc_backup || mc->mc_flags)) - return MDBX_PROBLEM; + if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE && + mc->mc_signature != MDBX_MC_LIVE)) + return MDBX_EBADSIGN; int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -14108,6 +14100,45 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY))) return MDBX_EACCESS; + if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ { + mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE); + if (unlikely(mc->mc_dbi != dbi || + /* paranoia */ mc->mc_signature != MDBX_MC_LIVE || + mc->mc_txn != txn)) + return MDBX_EINVAL; + + assert(mc->mc_db == &txn->mt_dbs[dbi]); + assert(mc->mc_dbx == &txn->mt_dbxs[dbi]); + assert(mc->mc_dbi == dbi); + assert(mc->mc_dbistate == &txn->mt_dbistate[dbi]); + return likely(mc->mc_dbi == dbi && + /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && + mc->mc_txn == txn) + ? MDBX_SUCCESS + : MDBX_EINVAL /* Disallow change DBI in nested transactions */; + } + + if (mc->mc_signature == MDBX_MC_LIVE) { + if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDBX_PROBLEM; + if (mc->mc_flags & C_UNTRACK) { + mdbx_cassert(mc, !(mc->mc_txn->mt_flags & MDBX_TXN_RDONLY)); + MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->mc_next; + mdbx_cassert(mc, *prev == mc); + *prev = mc->mc_next; + } + mc->mc_signature = MDBX_MC_READY4CLOSE; + mc->mc_flags = 0; + mc->mc_dbi = UINT_MAX; + mc->mc_next = NULL; + mc->mc_db = NULL; + mc->mc_dbx = NULL; + mc->mc_dbistate = NULL; + } + mdbx_cassert(mc, !(mc->mc_flags & C_UNTRACK)); + rc = mdbx_cursor_init(mc, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -14155,7 +14186,12 @@ int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { if (unlikely(rc != MDBX_SUCCESS)) return rc; + assert(dest->mc_db == src->mc_db); + assert(dest->mc_dbi == src->mc_dbi); + assert(dest->mc_dbx == src->mc_dbx); + assert(dest->mc_dbistate == src->mc_dbistate); again: + assert(dest->mc_txn == src->mc_txn); dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK; dest->mc_top = src->mc_top; dest->mc_snum = src->mc_snum; @@ -14165,6 +14201,8 @@ again: } if (src->mc_xcursor) { + dest->mc_xcursor->mx_db = src->mc_xcursor->mx_db; + dest->mc_xcursor->mx_dbx = src->mc_xcursor->mx_dbx; src = &src->mc_xcursor->mx_cursor; dest = &dest->mc_xcursor->mx_cursor; goto again; From 21bbba82fb6936691adb23f051869393638264a4 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Thu, 12 Nov 2020 05:36:49 +0300 Subject: [PATCH 16/28] mdbx: minor fix cursor_on_first/last for empty sub-db. Change-Id: I68c2e1bd28e62c9512bf8ef711fe21573e3450b9 --- src/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index afd60395..87ddda80 100644 --- a/src/core.c +++ b/src/core.c @@ -18615,7 +18615,7 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) - return MDBX_RESULT_FALSE; + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; for (unsigned i = 0; i < mc->mc_snum; ++i) { if (mc->mc_ki[i]) @@ -18634,7 +18634,7 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { : MDBX_EBADSIGN; if (!(mc->mc_flags & C_INITIALIZED)) - return MDBX_RESULT_FALSE; + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; for (unsigned i = 0; i < mc->mc_snum; ++i) { unsigned nkeys = page_numkeys(mc->mc_pg[i]); From 0c3deac9db2cc02b9d2820a86ab1374b7faa833f Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Fri, 13 Nov 2020 13:12:46 +0300 Subject: [PATCH 17/28] mdbx: fix cursors-EOF after search. Change-Id: Ie578611b64cca8dbcc00f958510143e8d1dc262a --- src/core.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 87ddda80..c021b26f 100644 --- a/src/core.c +++ b/src/core.c @@ -12088,8 +12088,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); mp = mc->mc_pg[mc->mc_top]; - if (!page_numkeys(mp)) { + if (unlikely(!page_numkeys(mp))) { mc->mc_ki[mc->mc_top] = 0; + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } if (IS_LEAF2(mp)) { @@ -12105,6 +12106,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, * was the one we wanted. */ mc->mc_ki[mc->mc_top] = 0; *exactp = 1; + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); goto set1; } if (rc > 0) { @@ -12123,6 +12127,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mdbx_cassert(mc, nkeys >= 1 && nkeys <= UINT16_MAX + 1); mc->mc_ki[mc->mc_top] = (indx_t)(nkeys - 1); *exactp = 1; + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); goto set1; } if (rc < 0) { @@ -12139,6 +12146,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (rc == 0) { /* current node was the one we wanted */ *exactp = 1; + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); goto set1; } } @@ -12156,6 +12166,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, /* There are no other pages */ mdbx_cassert(mc, nkeys <= UINT16_MAX); mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } } @@ -12165,8 +12176,12 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (op == MDBX_SET_RANGE) { rc = 0; goto set1; - } else + } else { + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); return MDBX_NOTFOUND; + } } } else { mc->mc_pg[0] = 0; @@ -12183,6 +12198,8 @@ set2: node = mdbx_node_search(mc, &aligned_key, exactp); if (!*exactp && op != MDBX_SET_RANGE) { /* MDBX_SET specified and not an exact match. */ + if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top]))) + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } @@ -12197,6 +12214,9 @@ set2: mdbx_cassert(mc, IS_LEAF(mp)); node = page_node(mp, 0); } + mdbx_cassert(mc, + mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); set1: mc->mc_flags |= C_INITIALIZED; @@ -12258,6 +12278,9 @@ set1: return rc; rc = mc->mc_dbx->md_dcmp(&aligned_data, &olddata); if (rc) { + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); if (op != MDBX_GET_BOTH_RANGE || rc > 0) return MDBX_NOTFOUND; *exactp = 0; @@ -12403,6 +12426,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (mc->mc_ki[mc->mc_top] >= nkeys) { mdbx_cassert(mc, nkeys <= UINT16_MAX); mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } mdbx_cassert(mc, nkeys > 0); @@ -12452,6 +12476,12 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, if (unlikely(key == NULL)) return MDBX_EINVAL; rc = mdbx_cursor_set(mc, key, data, op, &exact); + if (mc->mc_flags & C_INITIALIZED) { + mdbx_cassert(mc, mc->mc_snum > 0 && mc->mc_top < mc->mc_snum); + mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < + page_numkeys(mc->mc_pg[mc->mc_top]) || + (mc->mc_flags & C_EOF)); + } break; case MDBX_GET_MULTIPLE: if (unlikely(data == NULL || !(mc->mc_flags & C_INITIALIZED))) @@ -12524,6 +12554,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_INCOMPATIBLE; if (mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])) { mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mc->mc_pg[mc->mc_top]); + mc->mc_flags |= C_EOF; return MDBX_NOTFOUND; } { From 1c925a0f2eb7cc7d1aff68ad4c8982a33991dd3b Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Sat, 14 Nov 2020 11:44:12 +0300 Subject: [PATCH 18/28] mdbx: minor fix/avoid assertion inside `mdbx_cursor_set()`. Change-Id: I2cc38da698765b3eaa2bb575e16505d5d9438431 --- src/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index c021b26f..13c78e0e 100644 --- a/src/core.c +++ b/src/core.c @@ -12212,7 +12212,8 @@ set2: } mp = mc->mc_pg[mc->mc_top]; mdbx_cassert(mc, IS_LEAF(mp)); - node = page_node(mp, 0); + if (!IS_LEAF2(mp)) + node = page_node(mp, 0); } mdbx_cassert(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || From 149b3d09e6d59556e0f02f29597938b3987bc185 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Sun, 15 Nov 2020 22:46:37 +0300 Subject: [PATCH 19/28] mdbx: don't export DBIs from nested txn. Change-Id: I8c354ac7f889debe2fcf830263cb060253887652 --- src/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 13c78e0e..4ce74b05 100644 --- a/src/core.c +++ b/src/core.c @@ -5903,6 +5903,7 @@ static int mdbx_cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { * * Returns 0 on success, non-zero on failure. */ static void mdbx_cursors_eot(MDBX_txn *txn, const bool merge) { + mdbx_tassert(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); for (int i = txn->mt_numdbs; --i >= 0;) { MDBX_cursor *next, *mc = txn->tw.cursors[i]; if (!mc) @@ -6818,6 +6819,7 @@ int mdbx_txn_flags(const MDBX_txn *txn) { /* Export or close DBI handles opened in this txn. */ static void mdbx_dbis_update(MDBX_txn *txn, int keep) { + mdbx_tassert(txn, !txn->mt_parent && txn == txn->mt_env->me_txn0); MDBX_dbi n = txn->mt_numdbs; if (n) { bool locked = false; @@ -6919,8 +6921,6 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { if (txn == env->me_txn0) mdbx_txn_valgrind(env, nullptr); #endif - /* Export or close DBI handles created in this txn */ - mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ mdbx_cursors_eot(txn, false); if (!(env->me_flags & MDBX_WRITEMAP)) @@ -6931,6 +6931,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { env->me_txn = txn->mt_parent; if (txn == env->me_txn0) { mdbx_assert(env, txn->mt_parent == NULL); + /* Export or close DBI handles created in this txn */ + mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); mdbx_pnl_shrink(&txn->tw.retired_pages); mdbx_pnl_shrink(&txn->tw.reclaimed_pglist); /* The writer mutex was locked in mdbx_txn_begin. */ From 7cdbe1badb2b898429efa2952d74f02bed51128a Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 16 Nov 2020 06:52:48 +0300 Subject: [PATCH 20/28] mdbx-test: extending speculum mode for cursors tracking verification. Change-Id: I44786efcee6feb1c7d414c925717d08ed9d94e20 --- .github/actions/spelling/expect.txt | 1 + test/test.cc | 375 ++++++++++++++++++++++++++-- test/test.h | 47 +++- test/utils.h | 3 + 4 files changed, 392 insertions(+), 34 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 28a0b2b4..e9784952 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -238,6 +238,7 @@ datalen DATANAME DATASIGN datasync +dataview datetime DBC dbenv diff --git a/test/test.cc b/test/test.cc index c05f38d5..fba23264 100644 --- a/test/test.cc +++ b/test/test.cc @@ -645,29 +645,295 @@ bool test_execute(const actor_config &config_const) { //----------------------------------------------------------------------------- +enum speculum_cursors : int { + lowerbound = 0, + prev = 1, + prev_prev = 2, + next = 3, + next_next = 4 +}; + +bool testcase::is_same(const Item &a, const Item &b) const { + if (!is_samedata(dataview2iov(a.first), dataview2iov(b.first))) + return false; + if ((config.params.table_flags & MDBX_DUPSORT) && + !is_samedata(dataview2iov(a.second), dataview2iov(b.second))) + return false; + return true; +} + +bool testcase::is_same(const testcase::SET::const_iterator &it, + const MDBX_val &k, const MDBX_val &v) const { + + return is_samedata(dataview2iov(it->first), k) && + is_samedata(dataview2iov(it->second), v); +} + +void testcase::verbose(const char *where, const char *stage, + const testcase::SET::const_iterator &it) const { + if (it == speculum.end()) + log_verbose("speculum-%s: %s expect END", where, stage); + else { + char dump_key[32], dump_value[32]; + MDBX_val it_key = dataview2iov(it->first); + MDBX_val it_data = dataview2iov(it->second); + log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + } +} + +void testcase::verbose(const char *where, const char *stage, + const MDBX_val &key, const MDBX_val &data, + int err) const { + char dump_key[32], dump_value[32]; + if (err != MDBX_SUCCESS) + log_verbose("speculum-%s: %s cursor {%d, %s}", where, stage, err, + mdbx_strerror(err)); + else + log_verbose("speculum-%s: %s cursor {%s, %s}", where, stage, + mdbx_dump_val(&key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&data, dump_value, sizeof(dump_value))); +} + +void testcase::speculum_check_iterator(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + const MDBX_val &key, + const MDBX_val &data) const { + char dump_key[32], dump_value[32]; + MDBX_val it_key = dataview2iov(it->first); + MDBX_val it_data = dataview2iov(it->second); + // log_verbose("speculum-%s: %s expect {%s, %s}", where, stage, + // mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + // mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + if (!is_samedata(it_key, key)) + failure("speculum-%s: %s key mismatch %s (must) != %s", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&key, dump_value, sizeof(dump_value))); + if (!is_samedata(it_data, data)) + failure("speculum-%s: %s data mismatch %s (must) != %s", where, stage, + mdbx_dump_val(&it_data, dump_key, sizeof(dump_key)), + mdbx_dump_val(&data, dump_value, sizeof(dump_value))); +} + +void testcase::speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + int cursor_err, const MDBX_val &cursor_key, + const MDBX_val &cursor_data) const { + // verbose(where, stage, cursor_key, cursor_data, cursor_err); + // verbose(where, stage, it); + if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND) + failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", cursor_err, + mdbx_strerror(cursor_err)); + + char dump_key[32], dump_value[32]; + if (it == speculum.end() && cursor_err != MDBX_NOTFOUND) + failure("speculum-%s: %s extra pair {%s, %s}", where, stage, + mdbx_dump_val(&cursor_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&cursor_data, dump_value, sizeof(dump_value))); + else if (it != speculum.end() && cursor_err == MDBX_NOTFOUND) { + MDBX_val it_key = dataview2iov(it->first); + MDBX_val it_data = dataview2iov(it->second); + failure("speculum-%s: %s lack pair {%s, %s}", where, stage, + mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), + mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); + } else if (cursor_err == MDBX_SUCCESS) + speculum_check_iterator(where, stage, it, cursor_key, cursor_data); +} + +void testcase::speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + MDBX_cursor *cursor, + const MDBX_cursor_op op) const { + MDBX_val cursor_key = {}; + MDBX_val cursor_data = {}; + int err; + if (std::next(it) == speculum.end() && op == MDBX_PREV && + (config.params.table_flags & MDBX_DUPSORT)) { + /* Workaround for MDBX/LMDB flaw */ + err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, MDBX_LAST); + if (err == MDBX_SUCCESS) + err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, MDBX_LAST_DUP); + } else + err = mdbx_cursor_get(cursor, &cursor_key, &cursor_data, op); + return speculum_check_cursor(where, stage, it, err, cursor_key, cursor_data); +} + +void testcase::speculum_prepare_cursors(const Item &item) { + int err; + assert(config.params.speculum); + if (speculum_cursors[lowerbound]) + for (auto &guard : speculum_cursors) { + if (txn_guard.get() != mdbx_cursor_txn(guard.get()) || + dbi != mdbx_cursor_dbi(guard.get())) { + err = mdbx_cursor_bind(txn_guard.get(), guard.get(), dbi); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_cursor_bind()", err); + } + } + else + for (auto &guard : speculum_cursors) { + MDBX_cursor *cursor = nullptr; + err = mdbx_cursor_open(txn_guard.get(), dbi, &cursor); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_cursor_open()", err); + guard.reset(cursor); + } + + const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); + const MDBX_val item_key = dataview2iov(item.first), + item_data = dataview2iov(item.second); + MDBX_val lowerbound_key = item_key; + MDBX_val lowerbound_data = item_data; + // verbose("prepare-cursors", "item", item_key, item_data); + err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, + MDBX_SET_RANGE); + if (err == MDBX_SUCCESS && (config.params.table_flags & MDBX_DUPSORT) && + mdbx_cmp(txn_guard.get(), dbi, &lowerbound_key, &item_key) == 0) { + lowerbound_data = item_data; + err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, + MDBX_GET_BOTH_RANGE); + if (err == MDBX_NOTFOUND) + err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, + &lowerbound_data, MDBX_NEXT_NODUP); + } + + // verbose("prepare-cursors", "lowerbound", lowerbound_key, lowerbound_data, + // err); + auto it_lowerbound = speculum.lower_bound(item); + // verbose("prepare-cursors", "lowerbound", it_lowerbound); + speculum_check_cursor("prepare-cursors", "lowerbound", it_lowerbound, err, + lowerbound_key, lowerbound_data); + + const auto cursor_prev = speculum_cursors[prev].get(); + err = mdbx_cursor_copy(cursor_lowerbound, cursor_prev); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev", + "cursor-copy", err, mdbx_strerror(err)); + auto it_prev = it_lowerbound; + if (it_prev != speculum.begin()) { + speculum_check_cursor("prepare-cursors", "prev", --it_prev, cursor_prev, + MDBX_PREV); + } else if ((err = mdbx_cursor_on_first(cursor_prev)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev", err, + mdbx_strerror(err)); + + const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); + err = mdbx_cursor_copy(cursor_prev, cursor_prev_prev); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "prev-prev", + "cursor-copy", err, mdbx_strerror(err)); + auto it_prev_prev = it_prev; + if (it_prev_prev != speculum.begin()) { + speculum_check_cursor("prepare-cursors", "prev-prev", --it_prev_prev, + cursor_prev_prev, MDBX_PREV); + } else if ((err = mdbx_cursor_on_first(cursor_prev_prev)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-first %d %s", "prepare-cursors", "prev-prev", + err, mdbx_strerror(err)); + + const auto cursor_next = speculum_cursors[next].get(); + err = mdbx_cursor_copy(cursor_lowerbound, cursor_next); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next", + "cursor-copy", err, mdbx_strerror(err)); + auto it_next = it_lowerbound; + if (it_next != speculum.end()) { + speculum_check_cursor("prepare-cursors", "next", ++it_next, cursor_next, + MDBX_NEXT); + } else if ((err = mdbx_cursor_on_last(cursor_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next", err, + mdbx_strerror(err)); + + const auto cursor_next_next = speculum_cursors[next_next].get(); + err = mdbx_cursor_copy(cursor_next, cursor_next_next); + if (unlikely(err != MDBX_SUCCESS)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "next-next", + "cursor-copy", err, mdbx_strerror(err)); + auto it_next_next = it_next; + if (it_next_next != speculum.end()) { + speculum_check_cursor("prepare-cursors", "next-next", ++it_next_next, + cursor_next_next, MDBX_NEXT); + } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "prepare-cursors", "next-next", + err, mdbx_strerror(err)); +} + int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags) { - int err = mdbx_put(txn_guard.get(), dbi, &akey->value, &adata->value, flags); - if (err == MDBX_SUCCESS && config.params.speculum) { - const auto S_key = S(akey); - const auto S_data = S(adata); - if (unlikely(!speculum.emplace(S_key, S_data).second)) { - char dump_key[128], dump_value[128]; - log_error("speculum-insert: pair not inserted {%s, %s}", + int err; + bool rc = true; + Item item; + if (config.params.speculum) { + item.first = iov2dataview(akey); + item.second = iov2dataview(adata); + speculum_prepare_cursors(item); + } + + err = mdbx_put(txn_guard.get(), dbi, &akey->value, &adata->value, flags); + if (err != MDBX_SUCCESS && err != MDBX_KEYEXIST) + return err; + + if (config.params.speculum) { + char dump_key[32], dump_value[32]; + const auto insertion_result = speculum.insert(item); + if (err == MDBX_KEYEXIST && insertion_result.second) { + log_error("speculum.insert: unexpected %s {%s, %s}", "MDBX_KEYEXIST", mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + if (err == MDBX_SUCCESS && !insertion_result.second) { + log_error("speculum.insert: unexpected %s {%s, %s}", "MDBX_SUCCESS", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + + if (insertion_result.first != speculum.begin()) { + const auto cursor_prev = speculum_cursors[prev].get(); + auto it_prev = insertion_result.first; + speculum_check_cursor("after-insert", "prev", --it_prev, cursor_prev, + MDBX_GET_CURRENT); + if (it_prev != speculum.begin()) { + const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); + auto it_prev_prev = it_prev; + speculum_check_cursor("after-insert", "prev-prev", --it_prev_prev, + cursor_prev_prev, MDBX_GET_CURRENT); + } + } + + auto it_lowerbound = insertion_result.first; + if (++it_lowerbound != speculum.end()) { + const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); + speculum_check_cursor("after-insert", "lowerbound", it_lowerbound, + cursor_lowerbound, MDBX_GET_CURRENT); + + auto it_next = it_lowerbound; + if (++it_next != speculum.end()) { + const auto cursor_next = speculum_cursors[next].get(); + speculum_check_cursor("after-insert", "next", it_next, cursor_next, + MDBX_GET_CURRENT); + + auto it_next_next = it_next; + if (++it_next_next != speculum.end()) { + const auto cursor_next_next = speculum_cursors[next_next].get(); + speculum_check_cursor("after-insert", "next-next", it_next_next, + cursor_next_next, MDBX_GET_CURRENT); + } + } } } - return err; + + return rc ? MDBX_SUCCESS : MDBX_RESULT_TRUE; } int testcase::replace(const keygen::buffer &akey, const keygen::buffer &new_data, const keygen::buffer &old_data, MDBX_put_flags_t flags) { if (config.params.speculum) { - const auto S_key = S(akey); - const auto S_old = S(old_data); - const auto S_new = S(new_data); + const auto S_key = iov2dataview(akey); + const auto S_old = iov2dataview(old_data); + const auto S_new = iov2dataview(new_data); const auto removed = speculum.erase(SET::key_type(S_key, S_old)); if (unlikely(removed != 1)) { char dump_key[128], dump_value[128]; @@ -690,19 +956,82 @@ int testcase::replace(const keygen::buffer &akey, } int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { + int err; + bool rc = true; + Item item; if (config.params.speculum) { - const auto S_key = S(akey); - const auto S_data = S(adata); - const auto removed = speculum.erase(SET::key_type(S_key, S_data)); - if (unlikely(removed != 1)) { - char dump_key[128], dump_value[128]; - log_error("speculum-%s: %s old value {%s, %s}", "remove", - (removed > 1) ? "multi" : "no", - mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), - mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + item.first = iov2dataview(akey); + item.second = iov2dataview(adata); + speculum_prepare_cursors(item); + } + + err = mdbx_del(txn_guard.get(), dbi, &akey->value, &adata->value); + if (err != MDBX_NOTFOUND && err != MDBX_SUCCESS) + return err; + + if (config.params.speculum) { + char dump_key[32], dump_value[32]; + const auto it_found = speculum.find(item); + if (it_found == speculum.end()) { + if (err != MDBX_NOTFOUND) { + log_error("speculum.remove: unexpected %s {%s, %s}", "MDBX_SUCCESS", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + } else { + if (err != MDBX_SUCCESS) { + log_error("speculum.remove: unexpected %s {%s, %s}", "MDBX_NOTFOUND", + mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), + mdbx_dump_val(&adata->value, dump_value, sizeof(dump_value))); + rc = false; + } + + if (it_found != speculum.begin()) { + const auto cursor_prev = speculum_cursors[prev].get(); + auto it_prev = it_found; + speculum_check_cursor("after-remove", "prev", --it_prev, cursor_prev, + MDBX_GET_CURRENT); + if (it_prev != speculum.begin()) { + const auto cursor_prev_prev = speculum_cursors[prev_prev].get(); + auto it_prev_prev = it_prev; + speculum_check_cursor("after-remove", "prev-prev", --it_prev_prev, + cursor_prev_prev, MDBX_GET_CURRENT); + } + } + + auto it_next = it_found; + const auto cursor_next = speculum_cursors[next].get(); + const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); + if (++it_next != speculum.end()) { + speculum_check_cursor("after-remove", "next", it_next, cursor_next, + MDBX_GET_CURRENT); + speculum_check_cursor("after-remove", "lowerbound", it_next, + cursor_lowerbound, MDBX_NEXT); + + auto it_next_next = it_next; + const auto cursor_next_next = speculum_cursors[next_next].get(); + if (++it_next_next != speculum.end()) { + speculum_check_cursor("after-remove", "next-next", it_next_next, + cursor_next_next, MDBX_GET_CURRENT); + } else if ((err = mdbx_cursor_on_last(cursor_next_next)) != + MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "next-next", + err, mdbx_strerror(err)); + } else { + if ((err = mdbx_cursor_on_last(cursor_next)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "next", err, + mdbx_strerror(err)); + if ((err = mdbx_cursor_on_last(cursor_lowerbound)) != MDBX_RESULT_TRUE) + failure("speculum-%s: %s on-last %d %s", "after-remove", "lowerbound", + err, mdbx_strerror(err)); + } + + speculum.erase(it_found); } } - return mdbx_del(txn_guard.get(), dbi, &akey->value, &adata->value); + + return rc ? MDBX_SUCCESS : MDBX_RESULT_TRUE; } bool testcase::speculum_verify() { @@ -733,8 +1062,8 @@ bool testcase::speculum_verify() { akey.iov_len = avalue.iov_len = 0; akey.iov_base = avalue.iov_base = nullptr; } - const auto S_key = S(akey); - const auto S_data = S(avalue); + const auto S_key = iov2dataview(akey); + const auto S_data = iov2dataview(avalue); if (it != speculum.cend()) { mkey.iov_base = (void *)it->first.c_str(); mkey.iov_len = it->first.size(); diff --git a/test/test.h b/test/test.h index 1a765143..bcc33209 100644 --- a/test/test.h +++ b/test/test.h @@ -103,31 +103,33 @@ protected: #else using data_view = std::string; #endif - static inline data_view S(const MDBX_val &v) { + static inline data_view iov2dataview(const MDBX_val &v) { return (v.iov_base && v.iov_len) ? data_view(static_cast(v.iov_base), v.iov_len) : data_view(); } - static inline data_view S(const keygen::buffer &b) { return S(b->value); } + static inline data_view iov2dataview(const keygen::buffer &b) { + return iov2dataview(b->value); + } using Item = std::pair; + static MDBX_val dataview2iov(const data_view &v) { + MDBX_val r; + r.iov_base = (void *)v.data(); + r.iov_len = v.size(); + return r; + } struct ItemCompare { const testcase *context; ItemCompare(const testcase *owner) : context(owner) {} bool operator()(const Item &a, const Item &b) const { - MDBX_val va, vb; - va.iov_base = (void *)a.first.data(); - va.iov_len = a.first.size(); - vb.iov_base = (void *)b.first.data(); - vb.iov_len = b.first.size(); + MDBX_val va = dataview2iov(a.first), vb = dataview2iov(b.first); int cmp = mdbx_cmp(context->txn_guard.get(), context->dbi, &va, &vb); if (cmp == 0 && (context->config.params.table_flags & MDBX_DUPSORT) != 0) { - va.iov_base = (void *)a.second.data(); - va.iov_len = a.second.size(); - vb.iov_base = (void *)b.second.data(); - vb.iov_len = b.second.size(); + va = dataview2iov(a.second); + vb = dataview2iov(b.second); cmp = mdbx_dcmp(context->txn_guard.get(), context->dbi, &va, &vb); } return cmp < 0; @@ -159,6 +161,29 @@ protected: } last; SET speculum{ItemCompare(this)}, speculum_committed{ItemCompare(this)}; + scoped_cursor_guard speculum_cursors[5]; + void speculum_prepare_cursors(const Item &item); + void speculum_check_iterator(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + const MDBX_val &key, const MDBX_val &data) const; + void speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + int cursor_err, const MDBX_val &cursor_key, + const MDBX_val &cursor_data) const; + void speculum_check_cursor(const char *where, const char *stage, + const testcase::SET::const_iterator &it, + MDBX_cursor *cursor, + const MDBX_cursor_op op) const; + + void verbose(const char *where, const char *stage, + const testcase::SET::const_iterator &it) const; + void verbose(const char *where, const char *stage, const MDBX_val &key, + const MDBX_val &data, int err = MDBX_SUCCESS) const; + + bool is_same(const Item &a, const Item &b) const; + bool is_same(const SET::const_iterator &it, const MDBX_val &k, + const MDBX_val &v) const; + bool speculum_verify(); int insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags); diff --git a/test/utils.h b/test/utils.h index f00f34d1..d982df35 100644 --- a/test/utils.h +++ b/test/utils.h @@ -287,6 +287,9 @@ std::string data2hex(const void *ptr, size_t bytes, simple_checksum &checksum); bool hex2data(const char *hex_begin, const char *hex_end, void *ptr, size_t bytes, simple_checksum &checksum); bool is_samedata(const MDBX_val *a, const MDBX_val *b); +inline bool is_samedata(const MDBX_val &a, const MDBX_val &b) { + return is_samedata(&a, &b); +} std::string format(const char *fmt, ...); uint64_t entropy_ticks(void); From 9ea6922a2f6661b7488e9b8820c1ad3c0a8e953f Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 07:44:02 +0300 Subject: [PATCH 21/28] mdbx: minor simplify `mdbx_cursor_set()`. Change-Id: I034f396368024af21e8ee741c13a28c9bc277121 --- src/core.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/core.c b/src/core.c index 4ce74b05..4148ae73 100644 --- a/src/core.c +++ b/src/core.c @@ -12290,15 +12290,11 @@ set1: rc = 0; } *data = olddata; - } else { - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - if (unlikely((rc = mdbx_node_read( - mc, node, data, - pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) != - MDBX_SUCCESS)) - return rc; - } + } else if (unlikely((rc = mdbx_node_read(mc, node, data, + pp_txnid4chk(mc->mc_pg[mc->mc_top], + mc->mc_txn))) != + MDBX_SUCCESS)) + return rc; } /* The key already matches in all other cases */ From 5e02e7fb562afd42d112253e2c88d326874d93df Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 08:01:08 +0300 Subject: [PATCH 22/28] mdbx: add `MDBX_SET_LOWERBOUND` for `mdbx_cursor_get()`. Change-Id: I3638fdd10be8dfe128c43b465e9ca71f89175b3e --- mdbx.h | 15 ++++++++++++++- src/core.c | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index 79a6ba7c..1d4ad575 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1505,7 +1505,20 @@ enum MDBX_cursor_op { /** \ref MDBX_DUPFIXED -only: Position at previous page and return up to * a page of duplicate data items. */ - MDBX_PREV_MULTIPLE + MDBX_PREV_MULTIPLE, + + /** Position at first key-value pair greater than or equal to specified, + * return both key and data, and the return code depends on a exact match. + * + * For non DUPSORT-ed collections this work the same to \ref MDBX_SET_RANGE, + * but returns \ref MDBX_SUCCESS if key found exactly and + * \ref MDBX_RESULT_TRUE if greater key was found. + * + * For DUPSORT-ed a data value is taken into account for duplicates, + * i.e. for a pairs/tuples of a key and an each data value of duplicates. + * Returns \ref MDBX_SUCCESS if key-value pair found exactly and + * \ref MDBX_RESULT_TRUE if the next pair was returned. */ + MDBX_SET_LOWERBOUND }; #ifndef __cplusplus /** \ingroup c_cursors */ diff --git a/src/core.c b/src/core.c index 4148ae73..8404c537 100644 --- a/src/core.c +++ b/src/core.c @@ -12575,6 +12575,33 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, case MDBX_LAST_DUP: mfunc = mdbx_cursor_last; goto mmove; + case MDBX_SET_LOWERBOUND: { + if (unlikely(key == NULL || data == NULL)) + return MDBX_EINVAL; + MDBX_val save_data = *data; + rc = mdbx_cursor_set(mc, key, data, MDBX_SET_RANGE, &exact); + if (rc == MDBX_SUCCESS && exact && mc->mc_xcursor) { + mc->mc_flags &= ~C_DEL; + if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + *data = save_data; + exact = 0; + rc = mdbx_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, + MDBX_SET_RANGE, &exact); + if (rc == MDBX_NOTFOUND) { + mdbx_cassert(mc, !exact); + rc = mdbx_cursor_next(mc, key, data, MDBX_NEXT_NODUP); + } + } else { + int cmp = mc->mc_dbx->md_dcmp(&save_data, data); + exact = (cmp == 0); + if (cmp > 0) + rc = mdbx_cursor_next(mc, key, data, MDBX_NEXT_NODUP); + } + } + if (rc == MDBX_SUCCESS && !exact) + rc = MDBX_RESULT_TRUE; + break; + } default: mdbx_debug("unhandled/unimplemented cursor operation %u", op); return MDBX_EINVAL; From 96c2a56aa17c79ad0fd34f4e02040f69836cc4b6 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 08:03:00 +0300 Subject: [PATCH 23/28] mdbx: use `MDBX_SET_LOWERBOUND` in `mdbx_get_equal_or_great()`. Change-Id: I5dd72fe82bd15938afc60cbc0f92e23c00f0d344 --- src/core.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/core.c b/src/core.c index 8404c537..d9ddad50 100644 --- a/src/core.c +++ b/src/core.c @@ -11733,20 +11733,7 @@ int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, if (unlikely(rc != MDBX_SUCCESS)) return rc; - MDBX_val save_data = *data; - int exact = 0; - rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_SET_RANGE, &exact); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (exact && (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) != 0) { - *data = save_data; - exact = 0; - rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_GET_BOTH_RANGE, &exact); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } - return exact ? MDBX_SUCCESS : MDBX_RESULT_TRUE; + return mdbx_cursor_get(&cx.outer, key, data, MDBX_SET_LOWERBOUND); } int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, From b1008b12560c92027790ab11028829ee88dc738e Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 08:03:54 +0300 Subject: [PATCH 24/28] mdbx-test: use `MDBX_SET_LOWERBOUND`. Change-Id: I4f6efab69996d4678a78024337a6698a65c2386b --- test/test.cc | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/test/test.cc b/test/test.cc index fba23264..d2211a92 100644 --- a/test/test.cc +++ b/test/test.cc @@ -687,7 +687,7 @@ void testcase::verbose(const char *where, const char *stage, const MDBX_val &key, const MDBX_val &data, int err) const { char dump_key[32], dump_value[32]; - if (err != MDBX_SUCCESS) + if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE) log_verbose("speculum-%s: %s cursor {%d, %s}", where, stage, err, mdbx_strerror(err)); else @@ -722,7 +722,8 @@ void testcase::speculum_check_cursor(const char *where, const char *stage, const MDBX_val &cursor_data) const { // verbose(where, stage, cursor_key, cursor_data, cursor_err); // verbose(where, stage, it); - if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND) + if (cursor_err != MDBX_SUCCESS && cursor_err != MDBX_NOTFOUND && + cursor_err != MDBX_RESULT_TRUE) failure("speculum-%s: %s %s %d %s", where, stage, "cursor-get", cursor_err, mdbx_strerror(cursor_err)); @@ -737,7 +738,7 @@ void testcase::speculum_check_cursor(const char *where, const char *stage, failure("speculum-%s: %s lack pair {%s, %s}", where, stage, mdbx_dump_val(&it_key, dump_key, sizeof(dump_key)), mdbx_dump_val(&it_data, dump_value, sizeof(dump_value))); - } else if (cursor_err == MDBX_SUCCESS) + } else if (cursor_err == MDBX_SUCCESS || cursor_err == MDBX_RESULT_TRUE) speculum_check_iterator(where, stage, it, cursor_key, cursor_data); } @@ -787,19 +788,14 @@ void testcase::speculum_prepare_cursors(const Item &item) { MDBX_val lowerbound_data = item_data; // verbose("prepare-cursors", "item", item_key, item_data); err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, - MDBX_SET_RANGE); - if (err == MDBX_SUCCESS && (config.params.table_flags & MDBX_DUPSORT) && - mdbx_cmp(txn_guard.get(), dbi, &lowerbound_key, &item_key) == 0) { - lowerbound_data = item_data; - err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, &lowerbound_data, - MDBX_GET_BOTH_RANGE); - if (err == MDBX_NOTFOUND) - err = mdbx_cursor_get(cursor_lowerbound, &lowerbound_key, - &lowerbound_data, MDBX_NEXT_NODUP); - } - + MDBX_SET_LOWERBOUND); // verbose("prepare-cursors", "lowerbound", lowerbound_key, lowerbound_data, // err); + if (unlikely(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE && + err != MDBX_NOTFOUND)) + failure("speculum-%s: %s %s %d %s", "prepare-cursors", "lowerbound", + "cursor-get", err, mdbx_strerror(err)); + auto it_lowerbound = speculum.lower_bound(item); // verbose("prepare-cursors", "lowerbound", it_lowerbound); speculum_check_cursor("prepare-cursors", "lowerbound", it_lowerbound, err, From 015ed5bc98c209582e6a2039afb16fd5044b9e37 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 13:51:10 +0300 Subject: [PATCH 25/28] mdbx-cmake: minor fix `MDBX_BUILD_FLAGS` preparation. Change-Id: I91eebdffd06b4239ccc2dccd0931b3f82f0b5f52 --- CMakeLists.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 20a50a45..bc0461d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -756,14 +756,21 @@ if(NOT CMAKE_CONFIGURATION_TYPES) endif() endif() +# choice target to fetch definitions and options +if(MDBX_BUILD_SHARED_LIBRARY) + set(target4fetch mdbx) +else() + set(target4fetch mdbx-static) +endif() + # get definitions -get_target_property(defs_list mdbx-static COMPILE_DEFINITIONS) +get_target_property(defs_list ${target4fetch} COMPILE_DEFINITIONS) if(defs_list) list(APPEND MDBX_BUILD_FLAGS ${defs_list}) endif() # get target compile options -get_target_property(options_list mdbx-static COMPILE_OPTIONS) +get_target_property(options_list ${target4fetch} COMPILE_OPTIONS) if(options_list) list(APPEND MDBX_BUILD_FLAGS ${options_list}) endif() From 11fde67edc45134e40de14ba28fa9c40937c8ab8 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 13:59:29 +0300 Subject: [PATCH 26/28] mdbx-cmake: crutch for MSVC 19.28 (Visual Studio 16.8). Change-Id: I8fdb665d103031dc5e0c4b7963bc5eb65853a82f --- CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc0461d7..d41686bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -340,10 +340,12 @@ if(NOT DEFINED MDBX_CXX_STANDARD) set(MDBX_CXX_STANDARD 98) endif() endif() -if(NOT HAS_C11 LESS 0) - set(MDBX_C_STANDARD 11) -else() +# MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad! +# It unable process Windows SDK headers in the C11 mode! +if(HAS_C11 LESS 0 OR (MSVC AND MSVC_VERSION GREATER 1927)) set(MDBX_C_STANDARD 99) +else() + set(MDBX_C_STANDARD 11) endif() if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def") From 94fae97f8888dc0539f82bf2f3c86652b7f5256d Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 15:00:51 +0300 Subject: [PATCH 27/28] mdbx: avoid paranoid CLANG's enum ops UB. --- mdbx.h | 13 +++++++++++-- test/keygen.cc | 42 +++++++++++++++++++++++++----------------- test/nested.cc | 4 ++-- test/test.cc | 2 +- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/mdbx.h b/mdbx.h index 1d4ad575..39f304f2 100644 --- a/mdbx.h +++ b/mdbx.h @@ -483,9 +483,18 @@ typedef mode_t mdbx_mode_t; MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, ENUM b) { \ return ENUM(std::size_t(a) & std::size_t(b)); \ } \ + MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, size_t b) { \ + return ENUM(std::size_t(a) & b); \ + } \ + MDBX_CXX01_CONSTEXPR ENUM operator&(size_t a, ENUM b) { \ + return ENUM(a & std::size_t(b)); \ + } \ MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, ENUM b) { return a = a & b; } \ - MDBX_CXX01_CONSTEXPR ENUM operator~(ENUM a) { \ - return ENUM(~std::size_t(a)); \ + MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, size_t b) { \ + return a = a & b; \ + } \ + MDBX_CXX01_CONSTEXPR std::size_t operator~(ENUM a) { \ + return ~std::size_t(a); \ } \ MDBX_CXX01_CONSTEXPR ENUM operator^(ENUM a, ENUM b) { \ return ENUM(std::size_t(a) ^ std::size_t(b)); \ diff --git a/test/keygen.cc b/test/keygen.cc index 411bf623..a941e3ae 100644 --- a/test/keygen.cc +++ b/test/keygen.cc @@ -115,7 +115,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, serial_t key_serial = serial; serial_t value_serial = value_age << mapping.split; if (mapping.split) { - if (key_essentials.flags & MDBX_DUPSORT) { + if (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) { key_serial >>= mapping.split; value_serial += serial & mask(mapping.split); } else { @@ -203,7 +203,7 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, #endif key_essentials.flags = actor.table_flags & - uint16_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT); + MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT); assert(actor.keylen_min <= UINT16_MAX); key_essentials.minlen = (uint16_t)actor.keylen_min; assert(actor.keylen_max <= UINT32_MAX); @@ -213,7 +213,7 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, actor.pagesize, MDBX_db_flags_t(key_essentials.flags)))); value_essentials.flags = - actor.table_flags & uint16_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP); + actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP); assert(actor.datalen_min <= UINT16_MAX); value_essentials.minlen = (uint16_t)actor.datalen_min; assert(actor.datalen_max <= UINT32_MAX); @@ -236,35 +236,41 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, } void maker::make_linear() { - mapping.mesh = (key_essentials.flags & MDBX_DUPSORT) ? 0 : mapping.split; + mapping.mesh = (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) + ? 0 + : mapping.split; mapping.rotate = 0; mapping.offset = 0; const auto max_serial = mask(mapping.width) + base; const auto max_key_serial = - (mapping.split && (key_essentials.flags & MDBX_DUPSORT)) + (mapping.split && (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT)) ? max_serial >> mapping.split : max_serial; const auto max_value_serial = - (mapping.split && (key_essentials.flags & MDBX_DUPSORT)) + (mapping.split && (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT)) ? mask(mapping.split) : 0; while (key_essentials.minlen < 8 && (key_essentials.minlen == 0 || mask(key_essentials.minlen * 8) < max_key_serial)) { - key_essentials.minlen += - (key_essentials.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 : 1; + key_essentials.minlen += (MDBX_db_flags_t(key_essentials.flags) & + (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) + ? 4 + : 1; if (key_essentials.maxlen < key_essentials.minlen) key_essentials.maxlen = key_essentials.minlen; } - if ((key_essentials.flags | value_essentials.flags) & MDBX_DUPSORT) + if (MDBX_db_flags_t(key_essentials.flags | value_essentials.flags) & + MDBX_DUPSORT) while (value_essentials.minlen < 8 && (value_essentials.minlen == 0 || mask(value_essentials.minlen * 8) < max_value_serial)) { - value_essentials.minlen += - (value_essentials.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) ? 4 - : 1; + value_essentials.minlen += (MDBX_db_flags_t(value_essentials.flags) & + (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) + ? 4 + : 1; if (value_essentials.maxlen < value_essentials.minlen) value_essentials.maxlen = value_essentials.minlen; } @@ -272,8 +278,9 @@ void maker::make_linear() { bool maker::is_unordered() const { return mapping.rotate || - mapping.mesh > - ((key_essentials.flags & MDBX_DUPSORT) ? 0 : mapping.split); + mapping.mesh > ((MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) + ? 0 + : mapping.split); } bool maker::increment(serial_t &serial, int delta) const { @@ -374,9 +381,9 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, #endif assert(length(serial) <= out.value.iov_len); out.value.iov_base = out.bytes; - if (params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) { + if (MDBX_db_flags_t(params.flags) & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) { assert(params.maxlen == params.minlen); - if (params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) + if (MDBX_db_flags_t(params.flags) & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) assert(params.minlen == 4 || params.minlen == 8); out.u64 = serial; if (!is_byteorder_le() && out.value.iov_len != 8) @@ -393,7 +400,8 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, } else memset(out.bytes + 8, '\0', out.value.iov_len - prefix); } - if (unlikely(params.flags & (MDBX_REVERSEKEY | MDBX_REVERSEDUP))) + if (unlikely(MDBX_db_flags_t(params.flags) & + (MDBX_REVERSEKEY | MDBX_REVERSEDUP))) std::reverse((char *)out.value.iov_base, (char *)out.value.iov_base + out.value.iov_len); } diff --git a/test/nested.cc b/test/nested.cc index d26382bf..1952780d 100644 --- a/test/nested.cc +++ b/test/nested.cc @@ -80,8 +80,8 @@ bool testcase_nested::teardown() { void testcase_nested::push_txn() { MDBX_txn *txn; - MDBX_txn_flags_t flags = - MDBX_txn_flags_t(prng32() & (MDBX_TXN_NOSYNC | MDBX_TXN_NOMETASYNC)); + MDBX_txn_flags_t flags = MDBX_txn_flags_t( + prng32() & uint32_t(MDBX_TXN_NOSYNC | MDBX_TXN_NOMETASYNC)); int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_begin(nested)", err); diff --git a/test/test.cc b/test/test.cc index d2211a92..f23858f8 100644 --- a/test/test.cc +++ b/test/test.cc @@ -310,7 +310,7 @@ void testcase::txn_inject_writefault(void) { void testcase::txn_inject_writefault(MDBX_txn *txn) { if (config.params.inject_writefaultn && txn) { if (config.params.inject_writefaultn <= nops_completed && - (mdbx_txn_flags(txn) & MDBX_RDONLY) == 0) { + (MDBX_txn_flags_t(mdbx_txn_flags(txn)) & MDBX_TXN_RDONLY) == 0) { log_verbose( "== txn_inject_writefault(): got %u nops or more, inject FAULT", config.params.inject_writefaultn); From 5619fefe0a540ae9c17c660d0826db36ffe75b8c Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 17 Nov 2020 15:28:52 +0300 Subject: [PATCH 28/28] mdbx-test: fix `actor_poll()` against the `EBADF` error from pipe. --- test/osal-unix.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/osal-unix.cc b/test/osal-unix.cc index 57643667..7c0bedbc 100644 --- a/test/osal-unix.cc +++ b/test/osal-unix.cc @@ -414,6 +414,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { while (sigalarm_tail == sigalarm_head) { int status; pid = waitpid(0, &status, options); + const int err = errno; if (pid > 0) { if (WIFEXITED(status)) @@ -437,20 +438,19 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { if (sigusr1_tail != sigusr1_head) { sigusr1_tail = sigusr1_head; logging::progress_canary(true); - if (pid < 0 && errno == EINTR) + if (pid < 0 && err == EINTR) continue; } if (sigusr2_tail != sigusr2_head) { sigusr2_tail = sigusr2_head; logging::progress_canary(false); - if (pid < 0 && errno == EINTR) + if (pid < 0 && err == EINTR) continue; } if (pid == 0) break; - int err = errno; if (err != EINTR) return err; }