mdbx: refine page-rebalance.

Change-Id: Ia94379fa51eb5da1e1ec1a52cc1dd8b67ceb150a
This commit is contained in:
Leonid Yuriev 2021-03-03 16:05:52 +03:00
parent 46dcd6e7ca
commit 0cebc50291

View File

@ -16175,25 +16175,29 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
/* The threshold of minimum page fill factor, as a number of free bytes on a
* page. Pages emptier than this are candidates for merging. */
const unsigned room_threshold =
unsigned room_threshold =
page_space(mc->mc_txn->mt_env) -
(page_space(mc->mc_txn->mt_env) >> threshold_fill_exp2);
const MDBX_page *const tp = mc->mc_pg[mc->mc_top];
mdbx_debug("rebalancing %s page %" PRIaPGNO " (has %u keys, %.1f%% full)",
(pagetype & P_LEAF) ? "leaf" : "branch", tp->mp_pgno,
page_numkeys(tp), page_fill(mc->mc_txn->mt_env, tp));
const unsigned numkeys = page_numkeys(tp);
const unsigned room = page_room(tp);
mdbx_debug("rebalancing %s page %" PRIaPGNO
" (has %u keys, full %.1f%%, used %u, room %u bytes )",
(pagetype & P_LEAF) ? "leaf" : "branch", tp->mp_pgno, numkeys,
page_fill(mc->mc_txn->mt_env, tp),
page_used(mc->mc_txn->mt_env, tp), room);
if (unlikely(page_numkeys(tp) < minkeys)) {
if (unlikely(numkeys < minkeys)) {
mdbx_debug("page %" PRIaPGNO " must be merged due keys < %u threshold",
tp->mp_pgno, minkeys);
} else if (unlikely(page_room(tp) > room_threshold)) {
} else if (unlikely(room > room_threshold)) {
mdbx_debug("page %" PRIaPGNO " should be merged due room %u > %u threshold",
tp->mp_pgno, page_room(tp), room_threshold);
tp->mp_pgno, room, room_threshold);
} else {
mdbx_debug("no need to rebalance page %" PRIaPGNO
", room %u < %u threshold",
tp->mp_pgno, page_room(tp), room_threshold);
tp->mp_pgno, room, room_threshold);
mdbx_cassert(mc, mc->mc_db->md_entries > 0);
return MDBX_SUCCESS;
}
@ -16316,110 +16320,86 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
mdbx_cassert(mc, PAGETYPE(right) == PAGETYPE(mc->mc_pg[mc->mc_top]));
}
const indx_t ki_top = mc->mc_ki[mc->mc_top];
const indx_t ki_pre_top = mn.mc_ki[pre_top];
const indx_t nkeys = (indx_t)page_numkeys(mn.mc_pg[mn.mc_top]);
if (left && page_room(left) > room_threshold &&
(!right || page_room(right) < page_room(left))) {
const unsigned ki_top = mc->mc_ki[mc->mc_top];
const unsigned ki_pre_top = mn.mc_ki[pre_top];
const unsigned nkeys = page_numkeys(mn.mc_pg[mn.mc_top]);
const unsigned left_room = left ? page_room(left) : 0;
const unsigned right_room = right ? page_room(right) : 0;
retry:
if (left_room > room_threshold && left_room >= right_room) {
/* try merge with left */
mdbx_cassert(mc, page_numkeys(left) >= minkeys);
mn.mc_pg[mn.mc_top] = left;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top - 1);
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
mc->mc_ki[mc->mc_top] = 0;
const indx_t new_ki = (indx_t)(ki_top + page_numkeys(left));
const unsigned new_ki = ki_top + page_numkeys(left);
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
/* We want mdbx_rebalance to find mn when doing fixups */
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
if (likely(rc != MDBX_RESULT_TRUE)) {
cursor_copy_internal(&mn, mc);
mc->mc_ki[mc->mc_top] = new_ki;
mc->mc_ki[mc->mc_top] = (indx_t)new_ki;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
}
if (right && page_room(right) > room_threshold) {
if (right_room > room_threshold) {
/* try merge with right */
mdbx_cassert(mc, page_numkeys(right) >= minkeys);
mn.mc_pg[mn.mc_top] = right;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top + 1);
mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = nkeys;
mc->mc_ki[mc->mc_top] = (indx_t)nkeys;
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top;
mc->mc_ki[mc->mc_top] = (indx_t)ki_top;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
}
if (left && page_numkeys(left) > minkeys &&
(!right || page_numkeys(right) <= minkeys ||
page_room(right) > page_room(left))) {
const unsigned left_nkeys = left ? page_numkeys(left) : 0;
const unsigned right_nkeys = right ? page_numkeys(right) : 0;
if (left_nkeys > minkeys &&
(right_nkeys <= left_nkeys || right_room >= left_room)) {
/* try move from left */
mn.mc_pg[mn.mc_top] = left;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top - 1);
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
mc->mc_ki[mc->mc_top] = 0;
WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, true));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top + 1;
mc->mc_ki[mc->mc_top] = (indx_t)(ki_top + 1);
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
}
if (right && page_numkeys(right) > minkeys) {
if (right_nkeys > minkeys) {
/* try move from right */
mn.mc_pg[mn.mc_top] = right;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top + 1);
mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = nkeys;
mc->mc_ki[mc->mc_top] = (indx_t)nkeys;
WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, false));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top;
mc->mc_ki[mc->mc_top] = (indx_t)ki_top;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
}
if (nkeys >= minkeys) {
#if MDBX_DEBUG > 0
if (mdbx_audit_enabled())
return mdbx_cursor_check(mc, C_UPDATING);
#endif
return MDBX_SUCCESS;
mc->mc_ki[mc->mc_top] = (indx_t)ki_top;
if (!mdbx_audit_enabled())
return MDBX_SUCCESS;
return mdbx_cursor_check(mc, C_UPDATING);
}
if (left && (!right || page_room(left) > page_room(right))) {
/* try merge with left */
mdbx_cassert(mc, page_numkeys(left) >= minkeys);
mn.mc_pg[mn.mc_top] = left;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
mc->mc_ki[mc->mc_top] = 0;
const indx_t new_ki = (indx_t)(ki_top + page_numkeys(left));
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
/* We want mdbx_rebalance to find mn when doing fixups */
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
if (likely(rc != MDBX_RESULT_TRUE)) {
cursor_copy_internal(&mn, mc);
mc->mc_ki[mc->mc_top] = new_ki;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
}
if (likely(right)) {
/* try merge with right */
mdbx_cassert(mc, page_numkeys(right) >= minkeys);
mn.mc_pg[mn.mc_top] = right;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = nkeys;
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
if (likely(room_threshold > 0)) {
room_threshold = 0;
goto retry;
}
return MDBX_PROBLEM;
}