mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:14:12 +08:00
mdbx: refine mdbx_page_spill()
.
More for https://github.com/erthink/libmdbx/issues/132. Change-Id: I40788c13b54453c17d5e5dae6c3d2f93226f8e00
This commit is contained in:
parent
12ee5e6cac
commit
338de2e1fb
73
src/core.c
73
src/core.c
@ -4317,51 +4317,58 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
|
||||
const MDBX_val *data) {
|
||||
if (mc->mc_flags & C_SUB)
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
MDBX_txn *txn = mc->mc_txn;
|
||||
MDBX_dpl *const dl = txn->tw.dirtylist;
|
||||
|
||||
/* Estimate how much space this op will take */
|
||||
pgno_t i = mc->mc_db->md_depth;
|
||||
/* Named DBs also dirty the main DB */
|
||||
if (mc->mc_dbi >= CORE_DBS)
|
||||
i += txn->mt_dbs[MAIN_DBI].md_depth;
|
||||
/* For puts, roughly factor in the key+data size */
|
||||
if (key)
|
||||
i += bytes2pgno(txn->mt_env, node_size(key, data) + txn->mt_env->me_psize);
|
||||
i += i; /* double it for good measure */
|
||||
pgno_t need = i;
|
||||
|
||||
if (txn->tw.dirtyroom > i)
|
||||
if (txn->mt_flags & MDBX_WRITEMAP)
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
/* Less aggressive spill - we originally spilled the entire dirty list,
|
||||
* with a few exceptions for cursor pages and DB root pages. But this
|
||||
* turns out to be a lot of wasted effort because in a large txn many
|
||||
* of those pages will need to be used again. So now we spill only 1/8th
|
||||
* of the dirty pages. Testing revealed this to be a good tradeoff,
|
||||
* better than 1/2, 1/4, or 1/10. */
|
||||
if (need < txn->mt_env->me_options.dp_limit / 8)
|
||||
need = txn->mt_env->me_options.dp_limit / 8;
|
||||
/* Estimate how much space this op will take: */
|
||||
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
|
||||
size_t need = CURSOR_STACK + 3;
|
||||
/* 2) GC/FreeDB for any payload */
|
||||
if (mc->mc_dbi > FREE_DBI) {
|
||||
need += txn->mt_dbs[FREE_DBI].md_depth + 3;
|
||||
/* 3) Named DBs also dirty the main DB */
|
||||
if (mc->mc_dbi > MAIN_DBI)
|
||||
need += txn->mt_dbs[MAIN_DBI].md_depth + 3;
|
||||
}
|
||||
/* 4) Roughly factor in the key+data size */
|
||||
need += bytes2pgno(txn->mt_env, node_size(key, data)) + 1;
|
||||
/* 5) Double it for safety enough reserve */
|
||||
need += need;
|
||||
if (likely(txn->tw.dirtyroom > need))
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
const size_t spill_min = (txn->tw.dirtylist->length / /* TODO: options */ 8);
|
||||
const size_t spill_max = (txn->tw.dirtylist->length / /* TODO: options */ 2);
|
||||
size_t spill = need - txn->tw.dirtyroom;
|
||||
spill = (spill < spill_max) ? spill : spill_max;
|
||||
spill = (spill > spill_min) ? spill : spill_min;
|
||||
|
||||
int rc;
|
||||
if (!txn->tw.spill_pages) {
|
||||
txn->tw.spill_pages = mdbx_pnl_alloc(need);
|
||||
if (unlikely(!txn->tw.spill_pages))
|
||||
return MDBX_ENOMEM;
|
||||
txn->tw.spill_pages = mdbx_pnl_alloc(spill);
|
||||
if (unlikely(!txn->tw.spill_pages)) {
|
||||
rc = MDBX_ENOMEM;
|
||||
goto bailout;
|
||||
}
|
||||
} else {
|
||||
/* purge deleted slots */
|
||||
mdbx_pnl_purge_odd(txn->tw.spill_pages, 1);
|
||||
mdbx_pnl_reserve(&txn->tw.spill_pages, spill);
|
||||
}
|
||||
mdbx_notice("spilling %zu pages (have %u dirty-room, need %zu)", spill,
|
||||
txn->tw.dirtyroom, need);
|
||||
|
||||
/* Preserve pages which may soon be dirtied again */
|
||||
mdbx_pages_xkeep(mc, P_DIRTY, true);
|
||||
|
||||
MDBX_dpl *const dl = mdbx_dpl_sort(txn->tw.dirtylist);
|
||||
/* Save the page IDs of all the pages we're flushing */
|
||||
/* flush from the tail forward, this saves a lot of shifting later on. */
|
||||
int rc;
|
||||
for (i = dl->length; i && need; i--) {
|
||||
pgno_t pn = dl->items[i].pgno << 1;
|
||||
MDBX_page *dp = dl->items[i].ptr;
|
||||
size_t keep = dl->length;
|
||||
for (; keep && spill; keep--) {
|
||||
pgno_t pn = dl->items[keep].pgno << 1;
|
||||
MDBX_page *dp = dl->items[keep].ptr;
|
||||
if (dp->mp_flags & (P_LOOSE | P_KEEP))
|
||||
continue;
|
||||
/* Can't spill twice,
|
||||
@ -4381,17 +4388,17 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
|
||||
rc = mdbx_pnl_append(&txn->tw.spill_pages, pn);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
need--;
|
||||
spill--;
|
||||
}
|
||||
mdbx_pnl_sort(txn->tw.spill_pages);
|
||||
|
||||
/* Flush the spilled part of dirty list */
|
||||
rc = mdbx_page_flush(txn, i);
|
||||
rc = mdbx_page_flush(txn, keep);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
/* Reset any dirty pages we kept that page_flush didn't see */
|
||||
mdbx_pages_xkeep(mc, P_DIRTY | P_KEEP, i != 0);
|
||||
mdbx_pages_xkeep(mc, P_DIRTY | P_KEEP, keep > 0);
|
||||
|
||||
bailout:
|
||||
txn->mt_flags |= rc ? MDBX_TXN_ERROR : MDBX_TXN_SPILLS;
|
||||
|
@ -831,13 +831,10 @@ struct MDBX_txn {
|
||||
};
|
||||
};
|
||||
|
||||
/* Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty.
|
||||
* At 4 keys per node, enough for 2^64 nodes, so there's probably no need to
|
||||
* raise this on a 64 bit machine. */
|
||||
#if MDBX_WORDBITS >= 64
|
||||
#define CURSOR_STACK 28
|
||||
#define CURSOR_STACK 32
|
||||
#else
|
||||
#define CURSOR_STACK 20
|
||||
#define CURSOR_STACK 24
|
||||
#endif
|
||||
|
||||
struct MDBX_xcursor;
|
||||
|
Loading…
x
Reference in New Issue
Block a user