mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-23 01:28:20 +08:00
mdbx: alter mdbx_rebalance().
This commit is contained in:
parent
ff738f1512
commit
76f7c118c6
97
src/mdbx.c
97
src/mdbx.c
@ -10011,12 +10011,14 @@ static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) {
|
|||||||
* Returns 0 on success, non-zero on failure. */
|
* Returns 0 on success, non-zero on failure. */
|
||||||
static int mdbx_rebalance(MDBX_cursor *mc) {
|
static int mdbx_rebalance(MDBX_cursor *mc) {
|
||||||
MDBX_node *node;
|
MDBX_node *node;
|
||||||
int rc, fromleft;
|
int rc;
|
||||||
unsigned ptop, minkeys, thresh;
|
unsigned minkeys, thresh;
|
||||||
MDBX_cursor mn;
|
|
||||||
indx_t oldki;
|
|
||||||
|
|
||||||
if (IS_BRANCH(mc->mc_pg[mc->mc_top])) {
|
mdbx_cassert(mc, mc->mc_snum > 0);
|
||||||
|
mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth ||
|
||||||
|
IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1]));
|
||||||
|
const int pagetype = PAGETYPE(mc->mc_pg[mc->mc_top]);
|
||||||
|
if (pagetype == P_BRANCH) {
|
||||||
minkeys = 2;
|
minkeys = 2;
|
||||||
thresh = 1;
|
thresh = 1;
|
||||||
} else {
|
} else {
|
||||||
@ -10024,9 +10026,9 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
thresh = FILL_THRESHOLD;
|
thresh = FILL_THRESHOLD;
|
||||||
}
|
}
|
||||||
mdbx_debug("rebalancing %s page %" PRIaPGNO " (has %u keys, %.1f%% full)",
|
mdbx_debug("rebalancing %s page %" PRIaPGNO " (has %u keys, %.1f%% full)",
|
||||||
IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
|
(pagetype & P_LEAF) ? "leaf" : "branch",
|
||||||
mc->mc_pg[mc->mc_top]->mp_pgno, NUMKEYS(mc->mc_pg[mc->mc_top]),
|
mc->mc_pg[mc->mc_top]->mp_pgno, NUMKEYS(mc->mc_pg[mc->mc_top]),
|
||||||
(float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10);
|
PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10.24);
|
||||||
|
|
||||||
if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= thresh &&
|
if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= thresh &&
|
||||||
NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
|
NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
|
||||||
@ -10042,6 +10044,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
mdbx_cassert(mc, (mc->mc_db->md_entries == 0) == (nkeys == 0));
|
mdbx_cassert(mc, (mc->mc_db->md_entries == 0) == (nkeys == 0));
|
||||||
if (IS_SUBP(mp)) {
|
if (IS_SUBP(mp)) {
|
||||||
mdbx_debug("Can't rebalance a subpage, ignoring");
|
mdbx_debug("Can't rebalance a subpage, ignoring");
|
||||||
|
mdbx_cassert(mc, pagetype & P_LEAF);
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
if (nkeys == 0) {
|
if (nkeys == 0) {
|
||||||
@ -10056,7 +10059,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
if (mc->mc_flags & C_SUB)
|
if (mc->mc_flags & C_SUB)
|
||||||
mdbx_outer_db(mc)->md_leaf_pages -= 1;
|
mdbx_outer_db(mc)->md_leaf_pages -= 1;
|
||||||
rc = mdbx_pnl_append(&mc->mc_txn->mt_befree_pages, mp->mp_pgno);
|
rc = mdbx_pnl_append(&mc->mc_txn->mt_befree_pages, mp->mp_pgno);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
/* Adjust cursors pointing to mp */
|
/* Adjust cursors pointing to mp */
|
||||||
const MDBX_dbi dbi = mc->mc_dbi;
|
const MDBX_dbi dbi = mc->mc_dbi;
|
||||||
@ -10064,7 +10067,8 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
m2 = m2->mc_next) {
|
m2 = m2->mc_next) {
|
||||||
MDBX_cursor *m3 =
|
MDBX_cursor *m3 =
|
||||||
(mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
|
(mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
|
||||||
if (!(m3->mc_flags & C_INITIALIZED) || (m3->mc_snum < mc->mc_snum))
|
if (m3 == mc || !(m3->mc_flags & C_INITIALIZED) ||
|
||||||
|
(m3->mc_snum < mc->mc_snum))
|
||||||
continue;
|
continue;
|
||||||
if (m3->mc_pg[0] == mp) {
|
if (m3->mc_pg[0] == mp) {
|
||||||
m3->mc_snum = 0;
|
m3->mc_snum = 0;
|
||||||
@ -10082,7 +10086,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
return rc;
|
return rc;
|
||||||
mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0));
|
mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0));
|
||||||
rc = mdbx_page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], NULL);
|
rc = mdbx_page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], NULL);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
mc->mc_db->md_depth--;
|
mc->mc_db->md_depth--;
|
||||||
mc->mc_db->md_branch_pages--;
|
mc->mc_db->md_branch_pages--;
|
||||||
@ -10093,19 +10097,14 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
mc->mc_pg[i] = mc->mc_pg[i + 1];
|
mc->mc_pg[i] = mc->mc_pg[i + 1];
|
||||||
mc->mc_ki[i] = mc->mc_ki[i + 1];
|
mc->mc_ki[i] = mc->mc_ki[i + 1];
|
||||||
}
|
}
|
||||||
{
|
|
||||||
/* Adjust other cursors pointing to mp */
|
/* Adjust other cursors pointing to mp */
|
||||||
MDBX_cursor *m2, *m3;
|
MDBX_cursor *m2, *m3;
|
||||||
MDBX_dbi dbi = mc->mc_dbi;
|
MDBX_dbi dbi = mc->mc_dbi;
|
||||||
|
|
||||||
for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
|
for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
|
||||||
if (mc->mc_flags & C_SUB)
|
m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
|
||||||
m3 = &m2->mc_xcursor->mx_cursor;
|
if (m3 == mc || !(m3->mc_flags & C_INITIALIZED))
|
||||||
else
|
|
||||||
m3 = m2;
|
|
||||||
if (m3 == mc)
|
|
||||||
continue;
|
|
||||||
if (!(m3->mc_flags & C_INITIALIZED))
|
|
||||||
continue;
|
continue;
|
||||||
if (m3->mc_pg[0] == mp) {
|
if (m3->mc_pg[0] == mp) {
|
||||||
for (int i = 0; i < mc->mc_db->md_depth; i++) {
|
for (int i = 0; i < mc->mc_db->md_depth; i++) {
|
||||||
@ -10116,7 +10115,11 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
m3->mc_top--;
|
m3->mc_top--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]) ||
|
||||||
|
PAGETYPE(mc->mc_pg[mc->mc_top]) == pagetype);
|
||||||
|
mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth ||
|
||||||
|
IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1]));
|
||||||
} else {
|
} else {
|
||||||
mdbx_debug("root page %" PRIaPGNO
|
mdbx_debug("root page %" PRIaPGNO
|
||||||
" doesn't need rebalancing (flags 0x%x)",
|
" doesn't need rebalancing (flags 0x%x)",
|
||||||
@ -10127,50 +10130,53 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
|
|
||||||
/* The parent (branch page) must have at least 2 pointers,
|
/* The parent (branch page) must have at least 2 pointers,
|
||||||
* otherwise the tree is invalid. */
|
* otherwise the tree is invalid. */
|
||||||
ptop = mc->mc_top - 1;
|
const unsigned pre_top = mc->mc_top - 1;
|
||||||
mdbx_cassert(mc, IS_BRANCH(mc->mc_pg[ptop]));
|
mdbx_cassert(mc, IS_BRANCH(mc->mc_pg[pre_top]));
|
||||||
mdbx_cassert(mc, NUMKEYS(mc->mc_pg[ptop]) > 1);
|
mdbx_cassert(mc, !IS_SUBP(mc->mc_pg[0]));
|
||||||
|
mdbx_cassert(mc, NUMKEYS(mc->mc_pg[pre_top]) > 1);
|
||||||
|
|
||||||
/* Leaf page fill factor is below the threshold.
|
/* Leaf page fill factor is below the threshold.
|
||||||
* Try to move keys from left or right neighbor, or
|
* Try to move keys from left or right neighbor, or
|
||||||
* merge with a neighbor page. */
|
* merge with a neighbor page. */
|
||||||
|
|
||||||
/* Find neighbors. */
|
/* Find neighbors. */
|
||||||
|
MDBX_cursor mn;
|
||||||
mdbx_cursor_copy(mc, &mn);
|
mdbx_cursor_copy(mc, &mn);
|
||||||
mn.mc_xcursor = NULL;
|
mn.mc_xcursor = NULL;
|
||||||
|
|
||||||
oldki = mc->mc_ki[mc->mc_top];
|
indx_t oldki = mc->mc_ki[mc->mc_top];
|
||||||
if (mc->mc_ki[ptop] == 0) {
|
bool fromleft;
|
||||||
|
if (mc->mc_ki[pre_top] == 0) {
|
||||||
/* We're the leftmost leaf in our parent. */
|
/* We're the leftmost leaf in our parent. */
|
||||||
mdbx_debug("reading right neighbor");
|
mdbx_debug("reading right neighbor");
|
||||||
mn.mc_ki[ptop]++;
|
mn.mc_ki[pre_top]++;
|
||||||
node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]);
|
node = NODEPTR(mc->mc_pg[pre_top], mn.mc_ki[pre_top]);
|
||||||
rc = mdbx_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL);
|
rc = mdbx_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
mdbx_cassert(mc, PAGETYPE(mn.mc_pg[mn.mc_top]) ==
|
mdbx_cassert(mc, PAGETYPE(mn.mc_pg[mn.mc_top]) ==
|
||||||
PAGETYPE(mc->mc_pg[mc->mc_top]));
|
PAGETYPE(mc->mc_pg[mc->mc_top]));
|
||||||
mn.mc_ki[mn.mc_top] = 0;
|
mn.mc_ki[mn.mc_top] = 0;
|
||||||
mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]);
|
mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]);
|
||||||
fromleft = 0;
|
fromleft = false;
|
||||||
} else {
|
} else {
|
||||||
/* There is at least one neighbor to the left. */
|
/* There is at least one neighbor to the left. */
|
||||||
mdbx_debug("reading left neighbor");
|
mdbx_debug("reading left neighbor");
|
||||||
mn.mc_ki[ptop]--;
|
mn.mc_ki[pre_top]--;
|
||||||
node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]);
|
node = NODEPTR(mc->mc_pg[pre_top], mn.mc_ki[pre_top]);
|
||||||
rc = mdbx_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL);
|
rc = mdbx_page_get(mc, NODEPGNO(node), &mn.mc_pg[mn.mc_top], NULL);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
mdbx_cassert(mc, PAGETYPE(mn.mc_pg[mn.mc_top]) ==
|
mdbx_cassert(mc, PAGETYPE(mn.mc_pg[mn.mc_top]) ==
|
||||||
PAGETYPE(mc->mc_pg[mc->mc_top]));
|
PAGETYPE(mc->mc_pg[mc->mc_top]));
|
||||||
mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1;
|
mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1;
|
||||||
mc->mc_ki[mc->mc_top] = 0;
|
mc->mc_ki[mc->mc_top] = 0;
|
||||||
fromleft = 1;
|
fromleft = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
mdbx_debug("found neighbor page %" PRIaPGNO " (%u keys, %.1f%% full)",
|
mdbx_debug("found neighbor page %" PRIaPGNO " (%u keys, %.1f%% full)",
|
||||||
mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]),
|
mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]),
|
||||||
(float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10);
|
PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10.24);
|
||||||
|
|
||||||
/* If the neighbor page is above threshold and has enough keys,
|
/* If the neighbor page is above threshold and has enough keys,
|
||||||
* move one key from it. Otherwise we should try to merge them.
|
* move one key from it. Otherwise we should try to merge them.
|
||||||
@ -10178,24 +10184,39 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh &&
|
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh &&
|
||||||
NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
|
NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
|
||||||
rc = mdbx_node_move(&mn, mc, fromleft);
|
rc = mdbx_node_move(&mn, mc, fromleft);
|
||||||
if (fromleft) {
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
/* if we inserted on left, bump position up */
|
return rc;
|
||||||
oldki++;
|
oldki += fromleft /* if we inserted on left, bump position up */;
|
||||||
}
|
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]) ||
|
||||||
|
PAGETYPE(mc->mc_pg[mc->mc_top]) == pagetype);
|
||||||
|
mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth ||
|
||||||
|
IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1]));
|
||||||
} else {
|
} else {
|
||||||
if (!fromleft) {
|
if (!fromleft) {
|
||||||
rc = mdbx_page_merge(&mn, mc);
|
rc = mdbx_page_merge(&mn, mc);
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
return rc;
|
||||||
|
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]) ||
|
||||||
|
PAGETYPE(mc->mc_pg[mc->mc_top]) == pagetype);
|
||||||
|
mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth ||
|
||||||
|
IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1]));
|
||||||
} else {
|
} else {
|
||||||
oldki += NUMKEYS(mn.mc_pg[mn.mc_top]);
|
oldki += NUMKEYS(mn.mc_pg[mn.mc_top]);
|
||||||
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
|
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
|
||||||
/* We want mdbx_rebalance to find mn when doing fixups */
|
/* We want mdbx_rebalance to find mn when doing fixups */
|
||||||
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
|
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
return rc;
|
||||||
mdbx_cursor_copy(&mn, mc);
|
mdbx_cursor_copy(&mn, mc);
|
||||||
|
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]) ||
|
||||||
|
PAGETYPE(mc->mc_pg[mc->mc_top]) == pagetype);
|
||||||
|
mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth ||
|
||||||
|
IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1]));
|
||||||
}
|
}
|
||||||
mc->mc_flags &= ~C_EOF;
|
mc->mc_flags &= ~C_EOF;
|
||||||
}
|
}
|
||||||
mc->mc_ki[mc->mc_top] = oldki;
|
mc->mc_ki[mc->mc_top] = oldki;
|
||||||
return rc;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Complete a delete operation started by mdbx_cursor_del(). */
|
/* Complete a delete operation started by mdbx_cursor_del(). */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user