mirror of
https://github.com/isar/libmdbx.git
synced 2024-10-30 23:39:19 +08:00
mdbx: fix/rewrite mdbx_update_gc().
Change-Id: I580a1ff0cbeeb529e2bcbd50d97bfba7bcf5a546
This commit is contained in:
parent
3964c58b80
commit
434f0d5b57
@ -758,8 +758,8 @@ struct MDBX_env {
|
|||||||
MDBX_PNL me_free_pgs;
|
MDBX_PNL me_free_pgs;
|
||||||
/* ID2L of pages written during a write txn. Length MDBX_PNL_UM_SIZE. */
|
/* ID2L of pages written during a write txn. Length MDBX_PNL_UM_SIZE. */
|
||||||
MDBX_ID2L me_dirtylist;
|
MDBX_ID2L me_dirtylist;
|
||||||
/* Max number of freelist items that can fit in a single overflow page */
|
/* Number of freelist items that can fit in a single overflow page */
|
||||||
unsigned me_maxfree_1pg;
|
unsigned me_maxgc_ov1page;
|
||||||
/* Max size of a node on a page */
|
/* Max size of a node on a page */
|
||||||
unsigned me_nodemax;
|
unsigned me_nodemax;
|
||||||
unsigned me_maxkey_limit; /* max size of a key */
|
unsigned me_maxkey_limit; /* max size of a key */
|
||||||
|
366
src/mdbx.c
366
src/mdbx.c
@ -2170,7 +2170,7 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
|
|||||||
if (likely(flags & MDBX_ALLOC_GC)) {
|
if (likely(flags & MDBX_ALLOC_GC)) {
|
||||||
flags |= env->me_flags & (MDBX_COALESCE | MDBX_LIFORECLAIM);
|
flags |= env->me_flags & (MDBX_COALESCE | MDBX_LIFORECLAIM);
|
||||||
if (unlikely(mc->mc_flags & C_RECLAIMING)) {
|
if (unlikely(mc->mc_flags & C_RECLAIMING)) {
|
||||||
/* If mc is updating the freeDB, then the freelist cannot play
|
/* If mc is updating the freeDB, then the befree-list cannot play
|
||||||
* catch-up with itself by growing while trying to save it. */
|
* catch-up with itself by growing while trying to save it. */
|
||||||
flags &=
|
flags &=
|
||||||
~(MDBX_ALLOC_GC | MDBX_ALLOC_KICK | MDBX_COALESCE | MDBX_LIFORECLAIM);
|
~(MDBX_ALLOC_GC | MDBX_ALLOC_KICK | MDBX_COALESCE | MDBX_LIFORECLAIM);
|
||||||
@ -2409,8 +2409,8 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
|
|||||||
if (repg_len > MDBX_PNL_UM_SIZE / 2)
|
if (repg_len > MDBX_PNL_UM_SIZE / 2)
|
||||||
break;
|
break;
|
||||||
if (flags & MDBX_COALESCE) {
|
if (flags & MDBX_COALESCE) {
|
||||||
if (repg_len /* current size */ >= env->me_maxfree_1pg / 2 ||
|
if (repg_len /* current size */ >= env->me_maxgc_ov1page ||
|
||||||
repg_pos /* prev size */ >= env->me_maxfree_1pg / 4)
|
repg_pos /* prev size */ >= env->me_maxgc_ov1page / 2)
|
||||||
flags &= ~MDBX_COALESCE;
|
flags &= ~MDBX_COALESCE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3521,80 +3521,95 @@ static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *mc) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Save the freelist as of this transaction to the freeDB.
|
/* Cleanup reclaimed GC records, than save the befree-list as of this
|
||||||
* This changes the freelist. Keep trying until it stabilizes. */
|
* transaction to GC (aka freeDB). This recursive changes the reclaimed-list
|
||||||
static int mdbx_freelist_save(MDBX_txn *txn) {
|
* loose-list and befree-list. Keep trying until it stabilizes. */
|
||||||
|
static int mdbx_update_gc(MDBX_txn *txn) {
|
||||||
/* env->me_reclaimed_pglist[] can grow and shrink during this call.
|
/* env->me_reclaimed_pglist[] can grow and shrink during this call.
|
||||||
* env->me_last_reclaimed and txn->mt_befree_pages[] can only grow.
|
* env->me_last_reclaimed and txn->mt_befree_pages[] can only grow.
|
||||||
* Page numbers cannot disappear from txn->mt_befree_pages[]. */
|
* Page numbers cannot disappear from txn->mt_befree_pages[]. */
|
||||||
MDBX_cursor mc;
|
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *env = txn->mt_env;
|
||||||
int rc, more = 1;
|
|
||||||
txnid_t cleanup_reclaimed_id = 0, head_id = 0;
|
|
||||||
size_t befree_stored = 0;
|
|
||||||
intptr_t head_room = 0, total_room = 0;
|
|
||||||
unsigned cleanup_reclaimed_pos = 0, refill_reclaimed_pos = 0;
|
|
||||||
const bool lifo = (env->me_flags & MDBX_LIFORECLAIM) != 0;
|
const bool lifo = (env->me_flags & MDBX_LIFORECLAIM) != 0;
|
||||||
|
|
||||||
rc = mdbx_cursor_init(&mc, txn, FREE_DBI, NULL);
|
MDBX_cursor mc;
|
||||||
|
int rc = mdbx_cursor_init(&mc, txn, FREE_DBI, NULL);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
/* MDBX_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */
|
const char *dbg_prefix_mode = lifo ? " lifo" : " fifo";
|
||||||
const intptr_t clean_limit =
|
mdbx_trace("\n>>> @%" PRIaTXN, txn->mt_txnid);
|
||||||
(env->me_flags & (MDBX_NOMEMINIT | MDBX_WRITEMAP)) ? SSIZE_MAX
|
(void)dbg_prefix_mode;
|
||||||
: env->me_maxfree_1pg;
|
unsigned befree_stored = 0, loop = 0;
|
||||||
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
|
|
||||||
|
retry:
|
||||||
|
mdbx_trace(" >> restart");
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
again_on_freelist_change:
|
if (unlikely(/* paranoia */ ++loop > 42)) {
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_error("too more loops %u, bailout", loop);
|
||||||
|
rc = MDBX_PROBLEM;
|
||||||
|
goto bailout;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned placed = 0, cleaned_gc_slot = 0, reused_gc_slot = 0,
|
||||||
|
filled_gc_slot = ~0u;
|
||||||
|
txnid_t cleaned_gc_id = 0,
|
||||||
|
head_gc_id = lifo ? *env->me_oldest : env->me_last_reclaimed;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
/* Come back here after each Put() in case freelist changed */
|
/* Come back here after each Put() in case befree-list changed */
|
||||||
MDBX_val key, data;
|
MDBX_val key, data;
|
||||||
|
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
if (!lifo) {
|
if (!lifo) {
|
||||||
/* If using records from freeDB which we have not yet deleted,
|
/* If using records from freeDB which we have not yet deleted,
|
||||||
* now delete them and any we reserved for me_reclaimed_pglist. */
|
* now delete them and any we reserved for me_reclaimed_pglist. */
|
||||||
while (cleanup_reclaimed_id < env->me_last_reclaimed) {
|
while (cleaned_gc_id < env->me_last_reclaimed) {
|
||||||
rc = mdbx_cursor_first(&mc, &key, NULL);
|
rc = mdbx_cursor_first(&mc, &key, NULL);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
rc = mdbx_prep_backlog(txn, &mc);
|
rc = mdbx_prep_backlog(txn, &mc);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
cleanup_reclaimed_id = head_id = *(txnid_t *)key.iov_base;
|
cleaned_gc_id = head_gc_id = *(txnid_t *)key.iov_base;
|
||||||
total_room = head_room = 0;
|
mdbx_tassert(txn, cleaned_gc_id < *env->me_oldest);
|
||||||
more = 1;
|
placed = 0;
|
||||||
mdbx_tassert(txn, cleanup_reclaimed_id <= env->me_last_reclaimed);
|
mdbx_tassert(txn, cleaned_gc_id <= env->me_last_reclaimed);
|
||||||
mc.mc_flags |= C_RECLAIMING;
|
mc.mc_flags |= C_RECLAIMING;
|
||||||
|
mdbx_trace("%s.cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode,
|
||||||
|
cleaned_gc_id);
|
||||||
rc = mdbx_cursor_del(&mc, 0);
|
rc = mdbx_cursor_del(&mc, 0);
|
||||||
mc.mc_flags ^= C_RECLAIMING;
|
mc.mc_flags ^= C_RECLAIMING;
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
} else if (txn->mt_lifo_reclaimed) {
|
} else if (txn->mt_lifo_reclaimed) {
|
||||||
/* LY: cleanup reclaimed records. */
|
/* LY: cleanup reclaimed records. */
|
||||||
while (cleanup_reclaimed_pos < txn->mt_lifo_reclaimed[0]) {
|
while (cleaned_gc_slot < txn->mt_lifo_reclaimed[0]) {
|
||||||
cleanup_reclaimed_id = txn->mt_lifo_reclaimed[++cleanup_reclaimed_pos];
|
cleaned_gc_id = txn->mt_lifo_reclaimed[++cleaned_gc_slot];
|
||||||
key.iov_base = &cleanup_reclaimed_id;
|
head_gc_id = (head_gc_id > cleaned_gc_id) ? cleaned_gc_id : head_gc_id;
|
||||||
key.iov_len = sizeof(cleanup_reclaimed_id);
|
key.iov_base = &cleaned_gc_id;
|
||||||
|
key.iov_len = sizeof(cleaned_gc_id);
|
||||||
rc = mdbx_cursor_get(&mc, &key, NULL, MDBX_SET);
|
rc = mdbx_cursor_get(&mc, &key, NULL, MDBX_SET);
|
||||||
if (likely(rc != MDBX_NOTFOUND)) {
|
if (likely(rc != MDBX_NOTFOUND)) {
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
rc = mdbx_prep_backlog(txn, &mc);
|
rc = mdbx_prep_backlog(txn, &mc);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
mdbx_tassert(txn, cleaned_gc_id < *env->me_oldest);
|
||||||
mc.mc_flags |= C_RECLAIMING;
|
mc.mc_flags |= C_RECLAIMING;
|
||||||
|
mdbx_trace("%s.cleanup-reclaimed-id [%u]%" PRIaTXN, dbg_prefix_mode,
|
||||||
|
cleaned_gc_slot, cleaned_gc_id);
|
||||||
rc = mdbx_cursor_del(&mc, 0);
|
rc = mdbx_cursor_del(&mc, 0);
|
||||||
mc.mc_flags ^= C_RECLAIMING;
|
mc.mc_flags ^= C_RECLAIMING;
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle loose pages - put ones into the reclaimed- or befree-list
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
if (txn->mt_loose_pages) {
|
if (txn->mt_loose_pages) {
|
||||||
/* Return loose page numbers to me_reclaimed_pglist,
|
/* Return loose page numbers to me_reclaimed_pglist,
|
||||||
@ -3611,8 +3626,9 @@ again_on_freelist_change:
|
|||||||
mdbx_pnl_xappend(txn->mt_befree_pages, mp->mp_pgno);
|
mdbx_pnl_xappend(txn->mt_befree_pages, mp->mp_pgno);
|
||||||
} else {
|
} else {
|
||||||
/* Room for loose pages + temp PNL with same */
|
/* Room for loose pages + temp PNL with same */
|
||||||
if ((rc = mdbx_pnl_need(&env->me_reclaimed_pglist,
|
rc = mdbx_pnl_need(&env->me_reclaimed_pglist,
|
||||||
2 * txn->mt_loose_count + 1)) != 0)
|
2 * txn->mt_loose_count + 1);
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
MDBX_PNL loose = env->me_reclaimed_pglist +
|
MDBX_PNL loose = env->me_reclaimed_pglist +
|
||||||
MDBX_PNL_ALLOCLEN(env->me_reclaimed_pglist) -
|
MDBX_PNL_ALLOCLEN(env->me_reclaimed_pglist) -
|
||||||
@ -3648,9 +3664,9 @@ again_on_freelist_change:
|
|||||||
txn->mt_loose_count = 0;
|
txn->mt_loose_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle reclaimed pages - return suitable into unallocated space
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
if (env->me_reclaimed_pglist) {
|
if (env->me_reclaimed_pglist) {
|
||||||
/* Refund suitable pages into "unallocated" space */
|
|
||||||
pgno_t tail = txn->mt_next_pgno;
|
pgno_t tail = txn->mt_next_pgno;
|
||||||
pgno_t *const begin = env->me_reclaimed_pglist + 1;
|
pgno_t *const begin = env->me_reclaimed_pglist + 1;
|
||||||
pgno_t *const end = begin + env->me_reclaimed_pglist[0];
|
pgno_t *const end = begin + env->me_reclaimed_pglist[0];
|
||||||
@ -3680,12 +3696,12 @@ again_on_freelist_change:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Save the PNL of pages freed by this txn, to a single record */
|
// handle befree-list - store ones into singe gc-record
|
||||||
if (befree_stored < txn->mt_befree_pages[0]) {
|
if (befree_stored < txn->mt_befree_pages[0]) {
|
||||||
if (unlikely(!befree_stored)) {
|
if (unlikely(!befree_stored)) {
|
||||||
/* Make sure last page of freeDB is touched and on freelist */
|
/* Make sure last page of freeDB is touched and on befree-list */
|
||||||
rc = mdbx_page_search(&mc, NULL, MDBX_PS_LAST | MDBX_PS_MODIFY);
|
rc = mdbx_page_search(&mc, NULL, MDBX_PS_LAST | MDBX_PS_MODIFY);
|
||||||
if (unlikely(rc && rc != MDBX_NOTFOUND))
|
if (unlikely(rc != MDBX_SUCCESS && rc != MDBX_NOTFOUND))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
/* Write to last page of freeDB */
|
/* Write to last page of freeDB */
|
||||||
@ -3694,15 +3710,18 @@ again_on_freelist_change:
|
|||||||
do {
|
do {
|
||||||
data.iov_len = MDBX_PNL_SIZEOF(txn->mt_befree_pages);
|
data.iov_len = MDBX_PNL_SIZEOF(txn->mt_befree_pages);
|
||||||
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE);
|
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
/* Retry if mt_free_pages[] grew during the Put() */
|
/* Retry if mt_befree_pages[] grew during the Put() */
|
||||||
} while (data.iov_len < MDBX_PNL_SIZEOF(txn->mt_befree_pages));
|
} while (data.iov_len < MDBX_PNL_SIZEOF(txn->mt_befree_pages));
|
||||||
|
|
||||||
befree_stored = txn->mt_befree_pages[0];
|
befree_stored = (unsigned)txn->mt_befree_pages[0];
|
||||||
mdbx_pnl_sort(txn->mt_befree_pages);
|
mdbx_pnl_sort(txn->mt_befree_pages);
|
||||||
memcpy(data.iov_base, txn->mt_befree_pages, data.iov_len);
|
memcpy(data.iov_base, txn->mt_befree_pages, data.iov_len);
|
||||||
|
|
||||||
|
mdbx_trace("%s.put-befree #%u @ %" PRIaTXN, dbg_prefix_mode,
|
||||||
|
befree_stored, txn->mt_txnid);
|
||||||
|
|
||||||
if (mdbx_debug_enabled(MDBX_DBG_EXTRA)) {
|
if (mdbx_debug_enabled(MDBX_DBG_EXTRA)) {
|
||||||
unsigned i = befree_stored;
|
unsigned i = befree_stored;
|
||||||
mdbx_debug_extra("PNL write txn %" PRIaTXN " root %" PRIaPGNO
|
mdbx_debug_extra("PNL write txn %" PRIaTXN " root %" PRIaPGNO
|
||||||
@ -3715,45 +3734,33 @@ again_on_freelist_change:
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle reclaimed and loost pages - merge and store both into gc
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
const intptr_t rpl_len =
|
mdbx_tassert(txn, txn->mt_loose_count == 0);
|
||||||
(env->me_reclaimed_pglist ? env->me_reclaimed_pglist[0] : 0) +
|
const unsigned amount =
|
||||||
txn->mt_loose_count;
|
env->me_reclaimed_pglist ? env->me_reclaimed_pglist[0] : 0;
|
||||||
if (rpl_len && refill_reclaimed_pos == 0)
|
const unsigned left = amount - placed;
|
||||||
refill_reclaimed_pos = 1;
|
|
||||||
|
|
||||||
/* Reserve records for me_reclaimed_pglist[]. Split it if multi-page,
|
mdbx_trace("%s: amount %u, placed %d, left %d", dbg_prefix_mode, amount,
|
||||||
* to avoid searching freeDB for a page range. Use keys in
|
placed, (int)left);
|
||||||
* range [1,me_last_reclaimed]: Smaller than txnid of oldest reader. */
|
if (0 >= (int)left)
|
||||||
if (total_room >= rpl_len) {
|
|
||||||
if (total_room == rpl_len || --more < 0)
|
|
||||||
break;
|
break;
|
||||||
} else if (head_room >= (intptr_t)env->me_maxfree_1pg && head_id > 1) {
|
|
||||||
/* Keep current record (overflow page), add a new one */
|
|
||||||
head_id--;
|
|
||||||
refill_reclaimed_pos++;
|
|
||||||
head_room = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
mdbx_trace(" >> reserving");
|
||||||
|
txnid_t reservation_gc_id;
|
||||||
|
const unsigned lifo_gc_slots =
|
||||||
|
txn->mt_lifo_reclaimed ? (unsigned)txn->mt_lifo_reclaimed[0] : 0;
|
||||||
if (lifo) {
|
if (lifo) {
|
||||||
if (refill_reclaimed_pos >
|
if (reused_gc_slot >= lifo_gc_slots) {
|
||||||
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0)) {
|
|
||||||
/* LY: need just a txn-id for save page list. */
|
/* LY: need just a txn-id for save page list. */
|
||||||
rc = mdbx_page_alloc(&mc, 0, NULL, MDBX_ALLOC_GC | MDBX_ALLOC_KICK);
|
rc = mdbx_page_alloc(&mc, 0, NULL, MDBX_ALLOC_GC | MDBX_ALLOC_KICK);
|
||||||
if (likely(rc == 0))
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
/* LY: ok, reclaimed from freedb. */
|
/* LY: ok, reclaimed from freedb. */
|
||||||
continue;
|
continue;
|
||||||
if (unlikely(rc != MDBX_NOTFOUND))
|
if (unlikely(rc != MDBX_NOTFOUND))
|
||||||
/* LY: other troubles... */
|
/* LY: other troubles... */
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
/* LY: freedb is empty, will look any free txn-id in high2low order. */
|
|
||||||
if (unlikely(env->me_last_reclaimed < 1)) {
|
|
||||||
/* LY: not any txn in the past of freedb. */
|
|
||||||
rc = MDBX_MAP_FULL;
|
|
||||||
goto bailout;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(!txn->mt_lifo_reclaimed)) {
|
if (unlikely(!txn->mt_lifo_reclaimed)) {
|
||||||
txn->mt_lifo_reclaimed = mdbx_txl_alloc();
|
txn->mt_lifo_reclaimed = mdbx_txl_alloc();
|
||||||
if (unlikely(!txn->mt_lifo_reclaimed)) {
|
if (unlikely(!txn->mt_lifo_reclaimed)) {
|
||||||
@ -3761,57 +3768,80 @@ again_on_freelist_change:
|
|||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* LY: append the list. */
|
/* LY: freedb is empty, will look any free txn-id in high2low order. */
|
||||||
rc = mdbx_txl_append(&txn->mt_lifo_reclaimed,
|
rc = mdbx_txl_append(&txn->mt_lifo_reclaimed, --head_gc_id);
|
||||||
env->me_last_reclaimed - 1);
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
if (unlikely(rc))
|
|
||||||
goto bailout;
|
goto bailout;
|
||||||
--env->me_last_reclaimed;
|
cleaned_gc_slot += 1 /* mark GC cleanup is not needed. */;
|
||||||
/* LY: note that freeDB cleanup is not needed. */
|
|
||||||
++cleanup_reclaimed_pos;
|
mdbx_trace("%s: append @%" PRIaTXN
|
||||||
|
" to lifo-reclaimed, cleaned-gc-slot = %u",
|
||||||
|
dbg_prefix_mode, head_gc_id, cleaned_gc_slot);
|
||||||
}
|
}
|
||||||
mdbx_tassert(txn, txn->mt_lifo_reclaimed != NULL);
|
mdbx_tassert(txn, txn->mt_lifo_reclaimed != NULL);
|
||||||
head_id = txn->mt_lifo_reclaimed[refill_reclaimed_pos];
|
reservation_gc_id = txn->mt_lifo_reclaimed[++reused_gc_slot];
|
||||||
|
mdbx_trace("%s: take @%" PRIaTXN " from lifo-reclaimed[%u]",
|
||||||
|
dbg_prefix_mode, reservation_gc_id, reused_gc_slot);
|
||||||
|
head_gc_id =
|
||||||
|
(head_gc_id > reservation_gc_id) ? reservation_gc_id : head_gc_id;
|
||||||
} else {
|
} else {
|
||||||
mdbx_tassert(txn, txn->mt_lifo_reclaimed == NULL);
|
mdbx_tassert(txn, txn->mt_lifo_reclaimed == NULL);
|
||||||
|
reused_gc_slot++ /* just count reserved records */;
|
||||||
|
reservation_gc_id = head_gc_id--;
|
||||||
|
mdbx_trace("%s: take @%" PRIaTXN " from head-gc-id", dbg_prefix_mode,
|
||||||
|
reservation_gc_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (Re)write {key = head_id, PNL length = head_room} */
|
mdbx_trace("%s: head_gc_id %" PRIaTXN
|
||||||
total_room -= head_room;
|
", reused_gc_slot %u, lifo_gc_slots %u, reservation-id "
|
||||||
head_room = rpl_len - total_room;
|
"%" PRIaTXN,
|
||||||
if (head_room > (intptr_t)env->me_maxfree_1pg && head_id > 1) {
|
dbg_prefix_mode, head_gc_id, reused_gc_slot, lifo_gc_slots,
|
||||||
/* Overflow multi-page for part of me_reclaimed_pglist */
|
reservation_gc_id);
|
||||||
head_room /= (head_id < INT16_MAX) ? (pgno_t)head_id
|
|
||||||
: INT16_MAX; /* amortize page sizes */
|
const bool no_slots_more =
|
||||||
head_room += env->me_maxfree_1pg - head_room % (env->me_maxfree_1pg + 1);
|
head_gc_id < 2 && (!lifo || reused_gc_slot >= lifo_gc_slots);
|
||||||
} else if (head_room < 0) {
|
const unsigned chunk =
|
||||||
/* Rare case, not bothering to delete this record */
|
(left < env->me_maxgc_ov1page || no_slots_more)
|
||||||
head_room = 0;
|
? left
|
||||||
continue;
|
: (left < env->me_maxgc_ov1page * 2)
|
||||||
|
? /* the half to each of the last two chunks */ left / 2
|
||||||
|
: env->me_maxgc_ov1page;
|
||||||
|
|
||||||
|
mdbx_trace("%s: chunk %u, no_slots_more %s, gc-per-ovpage %u",
|
||||||
|
dbg_prefix_mode, chunk, no_slots_more ? "yes" : "no",
|
||||||
|
env->me_maxgc_ov1page);
|
||||||
|
|
||||||
|
mdbx_tassert(txn, reservation_gc_id < *env->me_oldest);
|
||||||
|
if (unlikely(reservation_gc_id < 1)) {
|
||||||
|
/* LY: not any txn in the past of freedb. */
|
||||||
|
rc = MDBX_PROBLEM;
|
||||||
|
goto bailout;
|
||||||
}
|
}
|
||||||
key.iov_len = sizeof(head_id);
|
|
||||||
key.iov_base = &head_id;
|
key.iov_len = sizeof(reservation_gc_id);
|
||||||
data.iov_len = (head_room + 1) * sizeof(pgno_t);
|
key.iov_base = &reservation_gc_id;
|
||||||
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE);
|
data.iov_len = (chunk + 1) * sizeof(pgno_t);
|
||||||
|
mdbx_trace("%s.reserve: %u [%u...%u] @%" PRIaTXN, dbg_prefix_mode, chunk,
|
||||||
|
placed + 1, placed + chunk + 1, reservation_gc_id);
|
||||||
|
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE);
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
/* PNL is initially empty, zero out at least the length */
|
/* PNL is initially empty, zero out at least the length */
|
||||||
pgno_t *pgs = (pgno_t *)data.iov_base;
|
memset(data.iov_base, 0, sizeof(pgno_t));
|
||||||
intptr_t i = head_room > clean_limit ? head_room : 0;
|
placed += chunk;
|
||||||
do {
|
mdbx_trace("%s.placed %u (+%u), continue", dbg_prefix_mode, placed, chunk);
|
||||||
pgs[i] = 0;
|
|
||||||
} while (--i >= 0);
|
|
||||||
total_room += head_room;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
mdbx_tassert(txn,
|
mdbx_tassert(txn,
|
||||||
cleanup_reclaimed_pos ==
|
cleaned_gc_slot ==
|
||||||
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
|
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
|
||||||
|
|
||||||
/* Fill in the reserved me_reclaimed_pglist records */
|
mdbx_trace(" >> filling");
|
||||||
|
/* Fill in the reserved records */
|
||||||
|
filled_gc_slot = reused_gc_slot;
|
||||||
rc = MDBX_SUCCESS;
|
rc = MDBX_SUCCESS;
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
mdbx_tassert(txn, mdbx_pnl_check(env->me_reclaimed_pglist, true));
|
||||||
if (env->me_reclaimed_pglist && env->me_reclaimed_pglist[0]) {
|
if (env->me_reclaimed_pglist && env->me_reclaimed_pglist[0]) {
|
||||||
@ -3819,89 +3849,109 @@ again_on_freelist_change:
|
|||||||
key.iov_len = data.iov_len = 0; /* avoid MSVC warning */
|
key.iov_len = data.iov_len = 0; /* avoid MSVC warning */
|
||||||
key.iov_base = data.iov_base = NULL;
|
key.iov_base = data.iov_base = NULL;
|
||||||
|
|
||||||
size_t rpl_left = env->me_reclaimed_pglist[0];
|
unsigned left = env->me_reclaimed_pglist[0];
|
||||||
pgno_t *rpl_end = env->me_reclaimed_pglist + rpl_left;
|
pgno_t *end = env->me_reclaimed_pglist + left;
|
||||||
if (txn->mt_lifo_reclaimed == 0) {
|
if (txn->mt_lifo_reclaimed == nullptr) {
|
||||||
mdbx_tassert(txn, lifo == 0);
|
mdbx_tassert(txn, lifo == 0);
|
||||||
rc = mdbx_cursor_first(&mc, &key, &data);
|
rc = mdbx_cursor_first(&mc, &key, &data);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
} else {
|
} else {
|
||||||
mdbx_tassert(txn, lifo != 0);
|
mdbx_tassert(txn, lifo != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
txnid_t id;
|
txnid_t fill_gc_id;
|
||||||
if (txn->mt_lifo_reclaimed == 0) {
|
mdbx_trace("%s: left %u of %u", dbg_prefix_mode, left,
|
||||||
|
(unsigned)env->me_reclaimed_pglist[0]);
|
||||||
|
if (txn->mt_lifo_reclaimed == nullptr) {
|
||||||
mdbx_tassert(txn, lifo == 0);
|
mdbx_tassert(txn, lifo == 0);
|
||||||
id = *(txnid_t *)key.iov_base;
|
fill_gc_id = *(txnid_t *)key.iov_base;
|
||||||
mdbx_tassert(txn, id <= env->me_last_reclaimed);
|
if (filled_gc_slot-- /* just countdown reserved records */ == 0 ||
|
||||||
|
fill_gc_id > env->me_last_reclaimed) {
|
||||||
|
mdbx_notice(
|
||||||
|
"** restart: reserve depleted (filled_slot %u, fill_id %" PRIaTXN
|
||||||
|
" > last_reclaimed %" PRIaTXN,
|
||||||
|
filled_gc_slot, fill_gc_id, env->me_last_reclaimed);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
mdbx_tassert(txn, lifo != 0);
|
mdbx_tassert(txn, lifo != 0);
|
||||||
mdbx_tassert(txn,
|
if (filled_gc_slot == 0) {
|
||||||
refill_reclaimed_pos > 0 &&
|
mdbx_notice("** restart: reserve depleted (filled_slot == 0)");
|
||||||
refill_reclaimed_pos <= txn->mt_lifo_reclaimed[0]);
|
goto retry;
|
||||||
id = txn->mt_lifo_reclaimed[refill_reclaimed_pos--];
|
}
|
||||||
key.iov_base = &id;
|
mdbx_tassert(txn, filled_gc_slot > 0 &&
|
||||||
key.iov_len = sizeof(id);
|
filled_gc_slot <= txn->mt_lifo_reclaimed[0]);
|
||||||
|
fill_gc_id = txn->mt_lifo_reclaimed[filled_gc_slot--];
|
||||||
|
mdbx_trace("%s.seek-reservaton @%" PRIaTXN " at lifo_reclaimed[%u]",
|
||||||
|
dbg_prefix_mode, fill_gc_id, (unsigned)filled_gc_slot);
|
||||||
|
key.iov_base = &fill_gc_id;
|
||||||
|
key.iov_len = sizeof(fill_gc_id);
|
||||||
rc = mdbx_cursor_get(&mc, &key, &data, MDBX_SET);
|
rc = mdbx_cursor_get(&mc, &key, &data, MDBX_SET);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
mdbx_tassert(
|
mdbx_tassert(
|
||||||
txn, cleanup_reclaimed_pos ==
|
txn, cleaned_gc_slot ==
|
||||||
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
|
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
|
||||||
|
|
||||||
mdbx_tassert(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
mdbx_tassert(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
||||||
size_t chunk_len = (data.iov_len / sizeof(pgno_t)) - 1;
|
const size_t space = (data.iov_len / sizeof(pgno_t)) - 1;
|
||||||
if (chunk_len > rpl_left)
|
const unsigned chunk = (space > left) ? left : (unsigned)space;
|
||||||
chunk_len = rpl_left;
|
data.iov_len = (chunk + 1) * sizeof(pgno_t);
|
||||||
data.iov_len = (chunk_len + 1) * sizeof(pgno_t);
|
mdbx_tassert(txn, fill_gc_id > 0 && fill_gc_id < *env->me_oldest);
|
||||||
key.iov_base = &id;
|
key.iov_base = &fill_gc_id;
|
||||||
key.iov_len = sizeof(id);
|
key.iov_len = sizeof(fill_gc_id);
|
||||||
|
|
||||||
rpl_end -= chunk_len;
|
end -= chunk;
|
||||||
data.iov_base = rpl_end;
|
data.iov_base = end;
|
||||||
pgno_t save = rpl_end[0];
|
pgno_t save = end[0];
|
||||||
rpl_end[0] = (pgno_t)chunk_len;
|
end[0] = (pgno_t)chunk;
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(rpl_end, false));
|
mdbx_tassert(txn, mdbx_pnl_check(end, false));
|
||||||
mc.mc_flags |= C_RECLAIMING;
|
mc.mc_flags |= C_RECLAIMING;
|
||||||
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_CURRENT);
|
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_CURRENT);
|
||||||
mc.mc_flags ^= C_RECLAIMING;
|
mc.mc_flags ^= C_RECLAIMING;
|
||||||
mdbx_tassert(txn, mdbx_pnl_check(rpl_end, false));
|
mdbx_tassert(txn, mdbx_pnl_check(end, false));
|
||||||
mdbx_tassert(
|
mdbx_tassert(
|
||||||
txn, cleanup_reclaimed_pos ==
|
txn, cleaned_gc_slot ==
|
||||||
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
|
(txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
|
||||||
rpl_end[0] = save;
|
pgno_t *from = end + 1, *to = end + end[0];
|
||||||
if (unlikely(rc))
|
mdbx_trace("%s.fill: %u [ %u:%" PRIaPGNO "...%u:%" PRIaPGNO
|
||||||
|
"] @%" PRIaTXN,
|
||||||
|
dbg_prefix_mode, (unsigned)end[0],
|
||||||
|
(unsigned)(from - env->me_reclaimed_pglist), *from,
|
||||||
|
(unsigned)(to - env->me_reclaimed_pglist), *to, fill_gc_id);
|
||||||
|
end[0] = save;
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
rpl_left -= chunk_len;
|
left -= chunk;
|
||||||
if (rpl_left == 0)
|
if (left == 0) {
|
||||||
|
rc = MDBX_SUCCESS;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!lifo) {
|
if (!lifo) {
|
||||||
rc = mdbx_cursor_next(&mc, &key, &data, MDBX_NEXT);
|
rc = mdbx_cursor_next(&mc, &key, &data, MDBX_NEXT);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mdbx_tassert(txn, rc == MDBX_SUCCESS);
|
||||||
|
if (txn->mt_lifo_reclaimed) {
|
||||||
|
mdbx_tassert(txn, cleaned_gc_slot == txn->mt_lifo_reclaimed[0]);
|
||||||
|
if (unlikely(filled_gc_slot != 0)) {
|
||||||
|
mdbx_notice("** restart: reserve excess (filled-slot %u > 0)",
|
||||||
|
filled_gc_slot);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bailout:
|
bailout:
|
||||||
if (txn->mt_lifo_reclaimed) {
|
if (txn->mt_lifo_reclaimed) {
|
||||||
mdbx_tassert(txn, rc || cleanup_reclaimed_pos == txn->mt_lifo_reclaimed[0]);
|
|
||||||
if (rc == MDBX_SUCCESS &&
|
|
||||||
cleanup_reclaimed_pos != txn->mt_lifo_reclaimed[0]) {
|
|
||||||
mdbx_tassert(txn, cleanup_reclaimed_pos < txn->mt_lifo_reclaimed[0]);
|
|
||||||
/* LY: zeroed cleanup_idx to force cleanup
|
|
||||||
* and refill created freeDB records. */
|
|
||||||
cleanup_reclaimed_pos = 0;
|
|
||||||
/* LY: restart filling */
|
|
||||||
total_room = head_room = refill_reclaimed_pos = 0;
|
|
||||||
more = 1;
|
|
||||||
goto again_on_freelist_change;
|
|
||||||
}
|
|
||||||
txn->mt_lifo_reclaimed[0] = 0;
|
txn->mt_lifo_reclaimed[0] = 0;
|
||||||
if (txn != env->me_txn0) {
|
if (txn != env->me_txn0) {
|
||||||
mdbx_txl_free(txn->mt_lifo_reclaimed);
|
mdbx_txl_free(txn->mt_lifo_reclaimed);
|
||||||
@ -3909,6 +3959,7 @@ bailout:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mdbx_trace("<<< rc = %d", rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4031,7 +4082,7 @@ static __cold bool mdbx_txn_import_dbi(MDBX_txn *txn, MDBX_dbi dbi) {
|
|||||||
(env->me_dbflags[i] & MDBX_VALID)) {
|
(env->me_dbflags[i] & MDBX_VALID)) {
|
||||||
txn->mt_dbs[i].md_flags = env->me_dbflags[i] & PERSISTENT_FLAGS;
|
txn->mt_dbs[i].md_flags = env->me_dbflags[i] & PERSISTENT_FLAGS;
|
||||||
txn->mt_dbflags[i] = DB_VALID | DB_USRVALID | DB_STALE;
|
txn->mt_dbflags[i] = DB_VALID | DB_USRVALID | DB_STALE;
|
||||||
assert(txn->mt_dbxs[i].md_cmp != NULL);
|
mdbx_tassert(txn, txn->mt_dbxs[i].md_cmp != NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
txn->mt_numdbs = snap_numdbs;
|
txn->mt_numdbs = snap_numdbs;
|
||||||
@ -4275,7 +4326,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = mdbx_freelist_save(txn);
|
rc = mdbx_update_gc(txn);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@ -4881,7 +4932,8 @@ int __cold mdbx_env_get_maxkeysize(MDBX_env *env) {
|
|||||||
|
|
||||||
#define mdbx_maxkey(nodemax) ((nodemax) - (NODESIZE + sizeof(MDBX_db)))
|
#define mdbx_maxkey(nodemax) ((nodemax) - (NODESIZE + sizeof(MDBX_db)))
|
||||||
|
|
||||||
#define mdbx_maxfree1pg(pagesize) (((pagesize)-PAGEHDRSZ) / sizeof(pgno_t) - 1)
|
#define mdbx_maxgc_ov1page(pagesize) \
|
||||||
|
(((pagesize)-PAGEHDRSZ) / sizeof(pgno_t) - 1)
|
||||||
|
|
||||||
int mdbx_get_maxkeysize(size_t pagesize) {
|
int mdbx_get_maxkeysize(size_t pagesize) {
|
||||||
if (pagesize == 0)
|
if (pagesize == 0)
|
||||||
@ -4903,11 +4955,11 @@ static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) {
|
|||||||
mdbx_ensure(env, pagesize <= MAX_PAGESIZE);
|
mdbx_ensure(env, pagesize <= MAX_PAGESIZE);
|
||||||
env->me_psize = (unsigned)pagesize;
|
env->me_psize = (unsigned)pagesize;
|
||||||
|
|
||||||
STATIC_ASSERT(mdbx_maxfree1pg(MIN_PAGESIZE) > 42);
|
STATIC_ASSERT(mdbx_maxgc_ov1page(MIN_PAGESIZE) > 42);
|
||||||
STATIC_ASSERT(mdbx_maxfree1pg(MAX_PAGESIZE) < MDBX_PNL_DB_MAX);
|
STATIC_ASSERT(mdbx_maxgc_ov1page(MAX_PAGESIZE) < MDBX_PNL_DB_MAX);
|
||||||
const intptr_t maxfree_1pg = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
|
const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
|
||||||
mdbx_ensure(env, maxfree_1pg > 42 && maxfree_1pg < MDBX_PNL_DB_MAX);
|
mdbx_ensure(env, maxgc_ov1page > 42 && maxgc_ov1page < MDBX_PNL_DB_MAX);
|
||||||
env->me_maxfree_1pg = (unsigned)maxfree_1pg;
|
env->me_maxgc_ov1page = (unsigned)maxgc_ov1page;
|
||||||
|
|
||||||
STATIC_ASSERT(mdbx_nodemax(MIN_PAGESIZE) > 42);
|
STATIC_ASSERT(mdbx_nodemax(MIN_PAGESIZE) > 42);
|
||||||
STATIC_ASSERT(mdbx_nodemax(MAX_PAGESIZE) < UINT16_MAX);
|
STATIC_ASSERT(mdbx_nodemax(MAX_PAGESIZE) < UINT16_MAX);
|
||||||
|
Loading…
Reference in New Issue
Block a user