mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:34:14 +08:00
mdbx: refine page-rebalance.
Change-Id: Ia94379fa51eb5da1e1ec1a52cc1dd8b67ceb150a
This commit is contained in:
parent
46dcd6e7ca
commit
0cebc50291
110
src/core.c
110
src/core.c
@ -16175,25 +16175,29 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
|
|
||||||
/* The threshold of minimum page fill factor, as a number of free bytes on a
|
/* The threshold of minimum page fill factor, as a number of free bytes on a
|
||||||
* page. Pages emptier than this are candidates for merging. */
|
* page. Pages emptier than this are candidates for merging. */
|
||||||
const unsigned room_threshold =
|
unsigned room_threshold =
|
||||||
page_space(mc->mc_txn->mt_env) -
|
page_space(mc->mc_txn->mt_env) -
|
||||||
(page_space(mc->mc_txn->mt_env) >> threshold_fill_exp2);
|
(page_space(mc->mc_txn->mt_env) >> threshold_fill_exp2);
|
||||||
|
|
||||||
const MDBX_page *const tp = mc->mc_pg[mc->mc_top];
|
const MDBX_page *const tp = mc->mc_pg[mc->mc_top];
|
||||||
mdbx_debug("rebalancing %s page %" PRIaPGNO " (has %u keys, %.1f%% full)",
|
const unsigned numkeys = page_numkeys(tp);
|
||||||
(pagetype & P_LEAF) ? "leaf" : "branch", tp->mp_pgno,
|
const unsigned room = page_room(tp);
|
||||||
page_numkeys(tp), page_fill(mc->mc_txn->mt_env, tp));
|
mdbx_debug("rebalancing %s page %" PRIaPGNO
|
||||||
|
" (has %u keys, full %.1f%%, used %u, room %u bytes )",
|
||||||
|
(pagetype & P_LEAF) ? "leaf" : "branch", tp->mp_pgno, numkeys,
|
||||||
|
page_fill(mc->mc_txn->mt_env, tp),
|
||||||
|
page_used(mc->mc_txn->mt_env, tp), room);
|
||||||
|
|
||||||
if (unlikely(page_numkeys(tp) < minkeys)) {
|
if (unlikely(numkeys < minkeys)) {
|
||||||
mdbx_debug("page %" PRIaPGNO " must be merged due keys < %u threshold",
|
mdbx_debug("page %" PRIaPGNO " must be merged due keys < %u threshold",
|
||||||
tp->mp_pgno, minkeys);
|
tp->mp_pgno, minkeys);
|
||||||
} else if (unlikely(page_room(tp) > room_threshold)) {
|
} else if (unlikely(room > room_threshold)) {
|
||||||
mdbx_debug("page %" PRIaPGNO " should be merged due room %u > %u threshold",
|
mdbx_debug("page %" PRIaPGNO " should be merged due room %u > %u threshold",
|
||||||
tp->mp_pgno, page_room(tp), room_threshold);
|
tp->mp_pgno, room, room_threshold);
|
||||||
} else {
|
} else {
|
||||||
mdbx_debug("no need to rebalance page %" PRIaPGNO
|
mdbx_debug("no need to rebalance page %" PRIaPGNO
|
||||||
", room %u < %u threshold",
|
", room %u < %u threshold",
|
||||||
tp->mp_pgno, page_room(tp), room_threshold);
|
tp->mp_pgno, room, room_threshold);
|
||||||
mdbx_cassert(mc, mc->mc_db->md_entries > 0);
|
mdbx_cassert(mc, mc->mc_db->md_entries > 0);
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -16316,110 +16320,86 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
mdbx_cassert(mc, PAGETYPE(right) == PAGETYPE(mc->mc_pg[mc->mc_top]));
|
mdbx_cassert(mc, PAGETYPE(right) == PAGETYPE(mc->mc_pg[mc->mc_top]));
|
||||||
}
|
}
|
||||||
|
|
||||||
const indx_t ki_top = mc->mc_ki[mc->mc_top];
|
const unsigned ki_top = mc->mc_ki[mc->mc_top];
|
||||||
const indx_t ki_pre_top = mn.mc_ki[pre_top];
|
const unsigned ki_pre_top = mn.mc_ki[pre_top];
|
||||||
const indx_t nkeys = (indx_t)page_numkeys(mn.mc_pg[mn.mc_top]);
|
const unsigned nkeys = page_numkeys(mn.mc_pg[mn.mc_top]);
|
||||||
if (left && page_room(left) > room_threshold &&
|
|
||||||
(!right || page_room(right) < page_room(left))) {
|
const unsigned left_room = left ? page_room(left) : 0;
|
||||||
|
const unsigned right_room = right ? page_room(right) : 0;
|
||||||
|
retry:
|
||||||
|
if (left_room > room_threshold && left_room >= right_room) {
|
||||||
/* try merge with left */
|
/* try merge with left */
|
||||||
mdbx_cassert(mc, page_numkeys(left) >= minkeys);
|
mdbx_cassert(mc, page_numkeys(left) >= minkeys);
|
||||||
mn.mc_pg[mn.mc_top] = left;
|
mn.mc_pg[mn.mc_top] = left;
|
||||||
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
|
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top - 1);
|
||||||
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
|
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
|
||||||
mc->mc_ki[mc->mc_top] = 0;
|
mc->mc_ki[mc->mc_top] = 0;
|
||||||
const indx_t new_ki = (indx_t)(ki_top + page_numkeys(left));
|
const unsigned new_ki = ki_top + page_numkeys(left);
|
||||||
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
|
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
|
||||||
/* We want mdbx_rebalance to find mn when doing fixups */
|
/* We want mdbx_rebalance to find mn when doing fixups */
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
|
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
|
||||||
if (likely(rc != MDBX_RESULT_TRUE)) {
|
if (likely(rc != MDBX_RESULT_TRUE)) {
|
||||||
cursor_copy_internal(&mn, mc);
|
cursor_copy_internal(&mn, mc);
|
||||||
mc->mc_ki[mc->mc_top] = new_ki;
|
mc->mc_ki[mc->mc_top] = (indx_t)new_ki;
|
||||||
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (right && page_room(right) > room_threshold) {
|
if (right_room > room_threshold) {
|
||||||
/* try merge with right */
|
/* try merge with right */
|
||||||
mdbx_cassert(mc, page_numkeys(right) >= minkeys);
|
mdbx_cassert(mc, page_numkeys(right) >= minkeys);
|
||||||
mn.mc_pg[mn.mc_top] = right;
|
mn.mc_pg[mn.mc_top] = right;
|
||||||
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
|
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top + 1);
|
||||||
mn.mc_ki[mn.mc_top] = 0;
|
mn.mc_ki[mn.mc_top] = 0;
|
||||||
mc->mc_ki[mc->mc_top] = nkeys;
|
mc->mc_ki[mc->mc_top] = (indx_t)nkeys;
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
|
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
|
||||||
if (likely(rc != MDBX_RESULT_TRUE)) {
|
if (likely(rc != MDBX_RESULT_TRUE)) {
|
||||||
mc->mc_ki[mc->mc_top] = ki_top;
|
mc->mc_ki[mc->mc_top] = (indx_t)ki_top;
|
||||||
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (left && page_numkeys(left) > minkeys &&
|
|
||||||
(!right || page_numkeys(right) <= minkeys ||
|
const unsigned left_nkeys = left ? page_numkeys(left) : 0;
|
||||||
page_room(right) > page_room(left))) {
|
const unsigned right_nkeys = right ? page_numkeys(right) : 0;
|
||||||
|
if (left_nkeys > minkeys &&
|
||||||
|
(right_nkeys <= left_nkeys || right_room >= left_room)) {
|
||||||
/* try move from left */
|
/* try move from left */
|
||||||
mn.mc_pg[mn.mc_top] = left;
|
mn.mc_pg[mn.mc_top] = left;
|
||||||
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
|
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top - 1);
|
||||||
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
|
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
|
||||||
mc->mc_ki[mc->mc_top] = 0;
|
mc->mc_ki[mc->mc_top] = 0;
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, true));
|
WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, true));
|
||||||
if (likely(rc != MDBX_RESULT_TRUE)) {
|
if (likely(rc != MDBX_RESULT_TRUE)) {
|
||||||
mc->mc_ki[mc->mc_top] = ki_top + 1;
|
mc->mc_ki[mc->mc_top] = (indx_t)(ki_top + 1);
|
||||||
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (right && page_numkeys(right) > minkeys) {
|
if (right_nkeys > minkeys) {
|
||||||
/* try move from right */
|
/* try move from right */
|
||||||
mn.mc_pg[mn.mc_top] = right;
|
mn.mc_pg[mn.mc_top] = right;
|
||||||
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
|
mn.mc_ki[mn.mc_top - 1] = (indx_t)(ki_pre_top + 1);
|
||||||
mn.mc_ki[mn.mc_top] = 0;
|
mn.mc_ki[mn.mc_top] = 0;
|
||||||
mc->mc_ki[mc->mc_top] = nkeys;
|
mc->mc_ki[mc->mc_top] = (indx_t)nkeys;
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, false));
|
WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, false));
|
||||||
if (likely(rc != MDBX_RESULT_TRUE)) {
|
if (likely(rc != MDBX_RESULT_TRUE)) {
|
||||||
mc->mc_ki[mc->mc_top] = ki_top;
|
mc->mc_ki[mc->mc_top] = (indx_t)ki_top;
|
||||||
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nkeys >= minkeys) {
|
if (nkeys >= minkeys) {
|
||||||
#if MDBX_DEBUG > 0
|
mc->mc_ki[mc->mc_top] = (indx_t)ki_top;
|
||||||
if (mdbx_audit_enabled())
|
if (!mdbx_audit_enabled())
|
||||||
return mdbx_cursor_check(mc, C_UPDATING);
|
return MDBX_SUCCESS;
|
||||||
#endif
|
return mdbx_cursor_check(mc, C_UPDATING);
|
||||||
return MDBX_SUCCESS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (left && (!right || page_room(left) > page_room(right))) {
|
if (likely(room_threshold > 0)) {
|
||||||
/* try merge with left */
|
room_threshold = 0;
|
||||||
mdbx_cassert(mc, page_numkeys(left) >= minkeys);
|
goto retry;
|
||||||
mn.mc_pg[mn.mc_top] = left;
|
|
||||||
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
|
|
||||||
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
|
|
||||||
mc->mc_ki[mc->mc_top] = 0;
|
|
||||||
const indx_t new_ki = (indx_t)(ki_top + page_numkeys(left));
|
|
||||||
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
|
|
||||||
/* We want mdbx_rebalance to find mn when doing fixups */
|
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
|
|
||||||
if (likely(rc != MDBX_RESULT_TRUE)) {
|
|
||||||
cursor_copy_internal(&mn, mc);
|
|
||||||
mc->mc_ki[mc->mc_top] = new_ki;
|
|
||||||
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (likely(right)) {
|
|
||||||
/* try merge with right */
|
|
||||||
mdbx_cassert(mc, page_numkeys(right) >= minkeys);
|
|
||||||
mn.mc_pg[mn.mc_top] = right;
|
|
||||||
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
|
|
||||||
mn.mc_ki[mn.mc_top] = 0;
|
|
||||||
mc->mc_ki[mc->mc_top] = nkeys;
|
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
|
|
||||||
if (likely(rc != MDBX_RESULT_TRUE)) {
|
|
||||||
mc->mc_ki[mc->mc_top] = ki_top;
|
|
||||||
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return MDBX_PROBLEM;
|
return MDBX_PROBLEM;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user