mdbx: рефакторинг с удалением cursor_spill() и MDBX_NOSPILL.

This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2022-12-16 23:02:59 +03:00
parent fe55f25665
commit 686145ec2e
3 changed files with 73 additions and 97 deletions

View File

@ -15,7 +15,7 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
12|0000 1000| | | | | | | | |
13|0000 2000|VALIDATION | | | | | |P_SPILLED | |
14|0000 4000|NOSUBDIR | | | | | |P_LOOSE | |
15|0000 8000| | |DB_VALID |NOSPILL | | |P_FROZEN | |
15|0000 8000| | |DB_VALID | | | |P_FROZEN | |
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE | | |
17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | | <= |
18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP | | | | |

View File

@ -3210,8 +3210,8 @@ static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard);
static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
static int page_touch(MDBX_cursor *mc);
static int cursor_touch(MDBX_cursor *mc);
static int touch_dbi(MDBX_cursor *mc);
static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key,
const MDBX_val *data);
#define MDBX_END_NAMES \
{ \
@ -5141,7 +5141,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
for (size_t i = 1; i <= dl->length; ++i) {
MDBX_page *dp = dl->items[i].ptr;
VERBOSE(
"dirtylist[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u",
"unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u",
i, dp->mp_pgno, dpl_npages(dl, i), dp->mp_flags, dpl_age(txn, i),
spill_prio(txn, i, reciprocal));
}
@ -5168,39 +5168,6 @@ done:
: MDBX_TXN_FULL;
}
static int cursor_spill(MDBX_cursor *mc, const MDBX_val *key,
const MDBX_val *data) {
MDBX_txn *txn = mc->mc_txn;
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
/* Estimate how much space this operation will take: */
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
size_t need = CURSOR_STACK + 3;
/* 2) GC/FreeDB for any payload */
if (mc->mc_dbi > FREE_DBI) {
need += txn->mt_dbs[FREE_DBI].md_depth + 3;
/* 3) Named DBs also dirty the main DB */
if (mc->mc_dbi > MAIN_DBI)
need += txn->mt_dbs[MAIN_DBI].md_depth + 3;
}
#if xMDBX_DEBUG_SPILLING != 2
/* production mode */
/* 4) Double the page chain estimation
* for extensively splitting, rebalance and merging */
need += need;
/* 5) Factor the key+data which to be put in */
need += bytes2pgno(txn->mt_env, node_size(key, data)) + 1;
#else
/* debug mode */
(void)key;
(void)data;
mc->mc_txn->mt_env->debug_dirtied_est = ++need;
mc->mc_txn->mt_env->debug_dirtied_act = 0;
#endif /* xMDBX_DEBUG_SPILLING == 2 */
return txn_spill(txn, mc, need);
}
/*----------------------------------------------------------------------------*/
static bool meta_bootid_match(const MDBX_meta *meta) {
@ -7833,7 +7800,7 @@ __hot static int page_touch(MDBX_cursor *mc) {
txn->tw.dirtylist->items[n].ptr == mp);
txn->tw.dirtylist->items[n].mlru =
(txn->tw.dirtylist->items[n].mlru & MDBX_dp_multi_mask) +
txn_lru_turn(txn);
(txn->tw.dirtylru & MDBX_dp_lru_mask);
return MDBX_SUCCESS;
}
if (IS_SUBP(mp)) {
@ -9991,8 +9958,12 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
}
static int gcu_touch(gcu_context_t *ctx) {
MDBX_val key, val;
key.iov_base = val.iov_base = nullptr;
key.iov_len = sizeof(txnid_t);
val.iov_len = MDBX_PNL_SIZEOF(ctx->cursor.mc_txn->tw.retired_pages);
ctx->cursor.mc_flags |= C_GCU;
int err = cursor_touch(&ctx->cursor);
int err = cursor_touch(&ctx->cursor, &key, &val);
ctx->cursor.mc_flags -= C_GCU;
return err;
}
@ -10036,18 +10007,7 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) {
for_all_before_touch, for_relist, for_split, for_cow,
for_tree_before_touch);
int err;
if (unlikely(for_relist > 2)) {
MDBX_val key, val;
key.iov_base = val.iov_base = nullptr;
key.iov_len = sizeof(txnid_t);
val.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
err = cursor_spill(&ctx->cursor, &key, &val);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
err = gcu_touch(ctx);
int err = gcu_touch(ctx);
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) &&
@ -15517,7 +15477,8 @@ __hot __noinline static int page_search_root(MDBX_cursor *mc,
ready:
if (flags & MDBX_PS_MODIFY) {
if (unlikely((rc = page_touch(mc)) != 0))
rc = page_touch(mc);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
mp = mc->mc_pg[mc->mc_top];
}
@ -15731,8 +15692,6 @@ __hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) {
mc->mc_pg[0]->mp_flags);
if (flags & MDBX_PS_MODIFY) {
if (!(*mc->mc_dbistate & DBI_DIRTY) && unlikely(rc = touch_dbi(mc)))
return rc;
if (unlikely(rc = page_touch(mc)))
return rc;
}
@ -16878,21 +16837,61 @@ static int touch_dbi(MDBX_cursor *mc) {
return MDBX_SUCCESS;
}
/* Touch all the pages in the cursor stack. Set mc_top.
* Makes sure all the pages are writable, before attempting a write operation.
* [in] mc The cursor to operate on. */
static int cursor_touch(MDBX_cursor *mc) {
int rc = MDBX_SUCCESS;
static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key,
const MDBX_val *data) {
cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) == 0);
cASSERT(mc, (mc->mc_flags & C_INITIALIZED) || mc->mc_snum == 0);
cASSERT(mc, cursor_is_tracked(mc));
txn_lru_turn(mc->mc_txn);
if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) {
rc = touch_dbi(mc);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
int err = touch_dbi(mc);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
if ((mc->mc_flags & C_SUB) == 0) {
MDBX_txn *const txn = mc->mc_txn;
/* Estimate how much space this operation will take: */
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
size_t need = CURSOR_STACK + 3;
/* 2) GC/FreeDB for any payload */
if (mc->mc_dbi > FREE_DBI) {
need += txn->mt_dbs[FREE_DBI].md_depth + 3;
/* 3) Named DBs also dirty the main DB */
if (mc->mc_dbi > MAIN_DBI)
need += txn->mt_dbs[MAIN_DBI].md_depth + 3;
}
#if xMDBX_DEBUG_SPILLING != 2
/* production mode */
/* 4) Double the page chain estimation
* for extensively splitting, rebalance and merging */
need += need;
/* 5) Factor the key+data which to be put in */
need += bytes2pgno(txn->mt_env, node_size(key, data)) + 1;
#else
/* debug mode */
(void)key;
(void)data;
txn->mt_env->debug_dirtied_est = ++need;
txn->mt_env->debug_dirtied_act = 0;
#endif /* xMDBX_DEBUG_SPILLING == 2 */
int err = txn_spill(txn, mc, need);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
int rc = MDBX_SUCCESS;
if (likely(mc->mc_snum)) {
mc->mc_top = 0;
do {
rc = page_touch(mc);
} while (!rc && ++(mc->mc_top) < mc->mc_snum);
if (unlikely(rc != MDBX_SUCCESS))
break;
mc->mc_top += 1;
} while (mc->mc_top < mc->mc_snum);
mc->mc_top = mc->mc_snum - 1;
}
return rc;
@ -16952,9 +16951,6 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
data->iov_base = nullptr;
}
const unsigned nospill = flags & MDBX_NOSPILL;
flags -= nospill;
if (unlikely(mc->mc_txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED)))
return (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS
: MDBX_BAD_TXN;
@ -17159,26 +17155,19 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
}
mc->mc_flags &= ~C_DEL;
/* Cursor is positioned, check for room in the dirty list */
if (!nospill) {
rdata = data;
if (unlikely(flags & MDBX_MULTIPLE)) {
rdata = &xdata;
xdata.iov_len = data->iov_len * dcount;
}
if (unlikely(err = cursor_spill(mc, key, rdata)))
return err;
rdata = data;
if (unlikely(flags & MDBX_MULTIPLE)) {
rdata = &xdata;
xdata.iov_len = data->iov_len * dcount;
}
err = cursor_touch(mc, key, rdata);
if (unlikely(err))
return err;
if (unlikely(rc == MDBX_NO_ROOT)) {
/* new database, write a root leaf page */
DEBUG("%s", "allocating new root leaf page");
if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) {
err = touch_dbi(mc);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
pgr_t npr = page_new(mc, P_LEAF);
if (unlikely(npr.err != MDBX_SUCCESS))
return npr.err;
@ -17205,11 +17194,6 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED)
npr.page->mp_flags |= P_LEAF2;
mc->mc_flags |= C_INITIALIZED;
} else {
/* make sure all cursor pages are writable */
err = cursor_touch(mc);
if (unlikely(err))
return err;
}
bool insert_key, insert_data, do_sub = false;
@ -17602,9 +17586,8 @@ new_sub:;
STATIC_ASSERT(
(MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) ==
MDBX_NOOVERWRITE);
xflags = MDBX_CURRENT | MDBX_NOSPILL |
((flags & MDBX_NODUPDATA) >>
SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE);
xflags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >>
SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE);
if ((flags & MDBX_CURRENT) == 0) {
xflags -= MDBX_CURRENT;
err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]);
@ -17718,11 +17701,7 @@ __hot int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])))
return MDBX_NOTFOUND;
if (likely((flags & MDBX_NOSPILL) == 0) &&
unlikely(rc = cursor_spill(mc, NULL, NULL)))
return rc;
rc = cursor_touch(mc);
rc = cursor_touch(mc, nullptr, nullptr);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@ -17744,7 +17723,7 @@ __hot int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
} else {
if (!(node_flags(node) & F_SUBDATA))
mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
rc = mdbx_cursor_del(&mc->mc_xcursor->mx_cursor, MDBX_NOSPILL);
rc = mdbx_cursor_del(&mc->mc_xcursor->mx_cursor, 0);
if (unlikely(rc))
return rc;
/* If sub-DB still has entries, we're done */

View File

@ -1681,9 +1681,6 @@ typedef struct MDBX_node {
#define CMP2INT(a, b) (((a) > (b)) - ((b) > (a)))
#endif
/* Do not spill pages to disk if txn is getting full, may fail instead */
#define MDBX_NOSPILL 0x8000
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
int64pgno(int64_t i64) {
if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))