2024-05-19 22:07:58 +03:00
|
|
|
|
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
|
|
|
|
|
|
|
|
|
|
#include "internals.h"
|
|
|
|
|
|
2024-05-23 12:42:59 +03:00
|
|
|
|
MDBX_NOTHROW_PURE_FUNCTION static bool is_lifo(const MDBX_txn *txn) {
|
|
|
|
|
return (txn->env->flags & MDBX_LIFORECLAIM) != 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(const gcu_t *ctx) {
|
|
|
|
|
return is_lifo(ctx->cursor.txn) ? " lifo" : " fifo";
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline size_t backlog_size(MDBX_txn *txn) {
|
|
|
|
|
return MDBX_PNL_GETSIZE(txn->tw.relist) + txn->tw.loose_count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int clean_stored_retired(MDBX_txn *txn, gcu_t *ctx) {
|
|
|
|
|
int err = MDBX_SUCCESS;
|
|
|
|
|
if (ctx->retired_stored) {
|
|
|
|
|
MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn));
|
|
|
|
|
tASSERT(txn, txn == txn->env->basal_txn && gc->next == gc);
|
|
|
|
|
gc->txn = txn;
|
|
|
|
|
gc->dbi_state = txn->dbi_state;
|
|
|
|
|
gc->top_and_flags = z_fresh_mark;
|
|
|
|
|
gc->next = txn->cursors[FREE_DBI];
|
|
|
|
|
txn->cursors[FREE_DBI] = gc;
|
|
|
|
|
do {
|
|
|
|
|
MDBX_val key, val;
|
|
|
|
|
#if MDBX_ENABLE_BIGFOOT
|
|
|
|
|
key.iov_base = &ctx->bigfoot;
|
|
|
|
|
#else
|
|
|
|
|
key.iov_base = &txn->txnid;
|
|
|
|
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
|
|
|
|
key.iov_len = sizeof(txnid_t);
|
|
|
|
|
const csr_t csr = cursor_seek(gc, &key, &val, MDBX_SET);
|
|
|
|
|
if (csr.err == MDBX_SUCCESS && csr.exact) {
|
|
|
|
|
ctx->retired_stored = 0;
|
|
|
|
|
err = cursor_del(gc, 0);
|
|
|
|
|
TRACE("== clear-4linear, backlog %zu, err %d", backlog_size(txn), err);
|
|
|
|
|
} else
|
|
|
|
|
err = (csr.err == MDBX_NOTFOUND) ? MDBX_SUCCESS : csr.err;
|
|
|
|
|
}
|
|
|
|
|
#if MDBX_ENABLE_BIGFOOT
|
|
|
|
|
while (!err && --ctx->bigfoot >= txn->txnid);
|
|
|
|
|
#else
|
|
|
|
|
while (0);
|
|
|
|
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
|
|
|
|
txn->cursors[FREE_DBI] = gc->next;
|
|
|
|
|
gc->next = gc;
|
|
|
|
|
}
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int touch_gc(gcu_t *ctx) {
|
|
|
|
|
tASSERT(ctx->cursor.txn, is_pointed(&ctx->cursor) ||
|
|
|
|
|
ctx->cursor.txn->dbs[FREE_DBI].leaf_pages == 0);
|
|
|
|
|
MDBX_val key, val;
|
|
|
|
|
key.iov_base = val.iov_base = nullptr;
|
|
|
|
|
key.iov_len = sizeof(txnid_t);
|
|
|
|
|
val.iov_len = MDBX_PNL_SIZEOF(ctx->cursor.txn->tw.retired_pages);
|
|
|
|
|
ctx->cursor.flags |= z_gcu_preparation;
|
|
|
|
|
int err = cursor_touch(&ctx->cursor, &key, &val);
|
|
|
|
|
ctx->cursor.flags -= z_gcu_preparation;
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Prepare a backlog of pages to modify GC itself, while reclaiming is
|
|
|
|
|
* prohibited. It should be enough to prevent search in gc_alloc_ex()
|
|
|
|
|
* during a deleting, when GC tree is unbalanced. */
|
|
|
|
|
static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) {
|
|
|
|
|
const size_t for_cow = txn->dbs[FREE_DBI].height;
|
|
|
|
|
const size_t for_rebalance =
|
|
|
|
|
for_cow + 1 +
|
|
|
|
|
(txn->dbs[FREE_DBI].height + 1ul >= txn->dbs[FREE_DBI].branch_pages);
|
|
|
|
|
size_t for_split = ctx->retired_stored == 0;
|
|
|
|
|
tASSERT(txn, is_pointed(&ctx->cursor) || txn->dbs[FREE_DBI].leaf_pages == 0);
|
|
|
|
|
|
|
|
|
|
const intptr_t retired_left =
|
|
|
|
|
MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored;
|
|
|
|
|
size_t for_relist = 0;
|
|
|
|
|
if (MDBX_ENABLE_BIGFOOT && retired_left > 0) {
|
|
|
|
|
for_relist = (retired_left + txn->env->maxgc_large1page - 1) /
|
|
|
|
|
txn->env->maxgc_large1page;
|
|
|
|
|
const size_t per_branch_page = txn->env->maxgc_per_branch;
|
|
|
|
|
for (size_t entries = for_relist; entries > 1; for_split += entries)
|
|
|
|
|
entries = (entries + per_branch_page - 1) / per_branch_page;
|
|
|
|
|
} else if (!MDBX_ENABLE_BIGFOOT && retired_left != 0) {
|
|
|
|
|
for_relist =
|
|
|
|
|
largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t for_tree_before_touch = for_cow + for_rebalance + for_split;
|
|
|
|
|
const size_t for_tree_after_touch = for_rebalance + for_split;
|
|
|
|
|
const size_t for_all_before_touch = for_relist + for_tree_before_touch;
|
|
|
|
|
const size_t for_all_after_touch = for_relist + for_tree_after_touch;
|
|
|
|
|
|
|
|
|
|
if (likely(for_relist < 2 && backlog_size(txn) > for_all_before_touch) &&
|
|
|
|
|
(ctx->cursor.top < 0 ||
|
|
|
|
|
is_modifable(txn, ctx->cursor.pg[ctx->cursor.top])))
|
|
|
|
|
return MDBX_SUCCESS;
|
|
|
|
|
|
|
|
|
|
TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, "
|
|
|
|
|
"4split %zu, "
|
|
|
|
|
"4cow %zu, 4tree %zu)",
|
|
|
|
|
ctx->retired_stored, retired_left, backlog_size(txn),
|
|
|
|
|
for_all_before_touch, for_relist, for_split, for_cow,
|
|
|
|
|
for_tree_before_touch);
|
|
|
|
|
|
|
|
|
|
int err = touch_gc(ctx);
|
|
|
|
|
TRACE("== after-touch, backlog %zu, err %d", backlog_size(txn), err);
|
|
|
|
|
|
|
|
|
|
if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) &&
|
|
|
|
|
MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored &&
|
|
|
|
|
err == MDBX_SUCCESS) {
|
|
|
|
|
if (unlikely(ctx->retired_stored)) {
|
|
|
|
|
err = clean_stored_retired(txn, ctx);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
if (!ctx->retired_stored)
|
|
|
|
|
return /* restart by tail-recursion */ prepare_backlog(txn, ctx);
|
|
|
|
|
}
|
|
|
|
|
err = gc_alloc_ex(&ctx->cursor, for_relist, ALLOC_RESERVE).err;
|
|
|
|
|
TRACE("== after-4linear, backlog %zu, err %d", backlog_size(txn), err);
|
|
|
|
|
cASSERT(&ctx->cursor,
|
|
|
|
|
backlog_size(txn) >= for_relist || err != MDBX_SUCCESS);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (backlog_size(txn) < for_all_after_touch && err == MDBX_SUCCESS)
|
|
|
|
|
err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE | ALLOC_UNIMPORTANT).err;
|
|
|
|
|
|
|
|
|
|
TRACE("<< backlog %zu, err %d, gc: height %u, branch %zu, leaf %zu, large "
|
|
|
|
|
"%zu, entries %zu",
|
|
|
|
|
backlog_size(txn), err, txn->dbs[FREE_DBI].height,
|
|
|
|
|
(size_t)txn->dbs[FREE_DBI].branch_pages,
|
|
|
|
|
(size_t)txn->dbs[FREE_DBI].leaf_pages,
|
|
|
|
|
(size_t)txn->dbs[FREE_DBI].large_pages,
|
|
|
|
|
(size_t)txn->dbs[FREE_DBI].items);
|
|
|
|
|
tASSERT(txn, err != MDBX_NOTFOUND || (txn->flags & txn_gc_drained) != 0);
|
|
|
|
|
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void zeroize_reserved(const MDBX_env *env, MDBX_val pnl) {
|
|
|
|
|
#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__))
|
|
|
|
|
/* Для предотвращения предупреждения Valgrind из mdbx_dump_val()
|
|
|
|
|
* вызванное через макрос DVAL_DEBUG() на выходе
|
|
|
|
|
* из cursor_seek(MDBX_SET_KEY), которая вызывается ниже внутри gc_update() в
|
|
|
|
|
* цикле очистки и цикле заполнения зарезервированных элементов. */
|
|
|
|
|
memset(pnl.iov_base, 0xBB, pnl.iov_len);
|
|
|
|
|
#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */
|
|
|
|
|
|
|
|
|
|
/* PNL is initially empty, zero out at least the length */
|
|
|
|
|
memset(pnl.iov_base, 0, sizeof(pgno_t));
|
|
|
|
|
if ((env->flags & (MDBX_WRITEMAP | MDBX_NOMEMINIT)) == 0)
|
|
|
|
|
/* zero out to avoid leaking values from uninitialized malloc'ed memory
|
|
|
|
|
* to the file in non-writemap mode if length of the saving page-list
|
|
|
|
|
* was changed during space reservation. */
|
|
|
|
|
memset(pnl.iov_base, 0, pnl.iov_len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) {
|
|
|
|
|
tASSERT(txn, txn->tw.loose_count > 0);
|
|
|
|
|
/* Return loose page numbers to tw.relist,
|
|
|
|
|
* though usually none are left at this point.
|
|
|
|
|
* The pages themselves remain in dirtylist. */
|
|
|
|
|
if (unlikely(!txn->tw.gc.reclaimed && txn->tw.gc.last_reclaimed < 1)) {
|
|
|
|
|
TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix(ctx),
|
|
|
|
|
txn->tw.loose_count);
|
|
|
|
|
int err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err;
|
|
|
|
|
if (err == MDBX_SUCCESS) {
|
|
|
|
|
TRACE("%s: retry since gc-slot for %zu loose-pages available",
|
|
|
|
|
dbg_prefix(ctx), txn->tw.loose_count);
|
|
|
|
|
return MDBX_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Put loose page numbers in tw.retired_pages,
|
|
|
|
|
* since unable to return ones to tw.relist. */
|
|
|
|
|
err = pnl_need(&txn->tw.retired_pages, txn->tw.loose_count);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) {
|
|
|
|
|
pnl_append_prereserved(txn->tw.retired_pages, lp->pgno);
|
|
|
|
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
|
|
|
|
|
VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
|
|
|
|
|
}
|
|
|
|
|
TRACE("%s: append %zu loose-pages to retired-pages", dbg_prefix(ctx),
|
|
|
|
|
txn->tw.loose_count);
|
|
|
|
|
} else {
|
|
|
|
|
/* Room for loose pages + temp PNL with same */
|
|
|
|
|
int err = pnl_need(&txn->tw.relist, 2 * txn->tw.loose_count + 2);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
pnl_t loose = txn->tw.relist + MDBX_PNL_ALLOCLEN(txn->tw.relist) -
|
|
|
|
|
txn->tw.loose_count - 1;
|
|
|
|
|
size_t count = 0;
|
|
|
|
|
for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) {
|
|
|
|
|
tASSERT(txn, lp->flags == P_LOOSE);
|
|
|
|
|
loose[++count] = lp->pgno;
|
|
|
|
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
|
|
|
|
|
VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
|
|
|
|
|
}
|
|
|
|
|
tASSERT(txn, count == txn->tw.loose_count);
|
|
|
|
|
MDBX_PNL_SETSIZE(loose, count);
|
|
|
|
|
pnl_sort(loose, txn->geo.first_unallocated);
|
|
|
|
|
pnl_merge(txn->tw.relist, loose);
|
|
|
|
|
TRACE("%s: append %zu loose-pages to reclaimed-pages", dbg_prefix(ctx),
|
|
|
|
|
txn->tw.loose_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* filter-out list of dirty-pages from loose-pages */
|
|
|
|
|
dpl_t *const dl = txn->tw.dirtylist;
|
|
|
|
|
if (dl) {
|
|
|
|
|
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
|
|
|
|
tASSERT(txn, dl->sorted <= dl->length);
|
|
|
|
|
size_t w = 0, sorted_out = 0;
|
|
|
|
|
for (size_t r = w; ++r <= dl->length;) {
|
|
|
|
|
page_t *dp = dl->items[r].ptr;
|
|
|
|
|
tASSERT(txn, dp->flags == P_LOOSE || is_modifable(txn, dp));
|
|
|
|
|
tASSERT(txn, dpl_endpgno(dl, r) <= txn->geo.first_unallocated);
|
|
|
|
|
if ((dp->flags & P_LOOSE) == 0) {
|
|
|
|
|
if (++w != r)
|
|
|
|
|
dl->items[w] = dl->items[r];
|
|
|
|
|
} else {
|
|
|
|
|
tASSERT(txn, dp->flags == P_LOOSE);
|
|
|
|
|
sorted_out += dl->sorted >= r;
|
|
|
|
|
if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP))
|
|
|
|
|
page_shadow_release(txn->env, dp, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages",
|
|
|
|
|
dbg_prefix(ctx), dl->length, w);
|
|
|
|
|
tASSERT(txn, txn->tw.loose_count == dl->length - w);
|
|
|
|
|
dl->sorted -= sorted_out;
|
|
|
|
|
tASSERT(txn, dl->sorted <= w);
|
|
|
|
|
dpl_setlen(dl, w);
|
|
|
|
|
dl->pages_including_loose -= txn->tw.loose_count;
|
|
|
|
|
txn->tw.dirtyroom += txn->tw.loose_count;
|
|
|
|
|
tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
|
|
|
|
|
(txn->parent ? txn->parent->tw.dirtyroom
|
|
|
|
|
: txn->env->options.dp_limit));
|
|
|
|
|
} else {
|
|
|
|
|
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
|
|
|
|
}
|
|
|
|
|
txn->tw.loose_pages = nullptr;
|
|
|
|
|
txn->tw.loose_count = 0;
|
|
|
|
|
#if MDBX_ENABLE_REFUND
|
|
|
|
|
txn->tw.loose_refund_wl = 0;
|
|
|
|
|
#endif /* MDBX_ENABLE_REFUND */
|
|
|
|
|
return MDBX_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) {
|
|
|
|
|
int err;
|
|
|
|
|
if (unlikely(!ctx->retired_stored)) {
|
|
|
|
|
/* Make sure last page of GC is touched and on retired-list */
|
|
|
|
|
err = outer_last(&ctx->cursor, nullptr, nullptr);
|
|
|
|
|
if (likely(err == MDBX_SUCCESS))
|
|
|
|
|
err = touch_gc(ctx);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS) && err != MDBX_NOTFOUND)
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MDBX_val key, data;
|
|
|
|
|
#if MDBX_ENABLE_BIGFOOT
|
|
|
|
|
size_t retired_pages_before;
|
|
|
|
|
do {
|
|
|
|
|
if (ctx->bigfoot > txn->txnid) {
|
|
|
|
|
err = clean_stored_retired(txn, ctx);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
tASSERT(txn, ctx->bigfoot <= txn->txnid);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
retired_pages_before = MDBX_PNL_GETSIZE(txn->tw.retired_pages);
|
|
|
|
|
err = prepare_backlog(txn, ctx);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
|
|
|
|
TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx),
|
|
|
|
|
retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pnl_sort(txn->tw.retired_pages, txn->geo.first_unallocated);
|
|
|
|
|
ctx->retired_stored = 0;
|
|
|
|
|
ctx->bigfoot = txn->txnid;
|
|
|
|
|
do {
|
|
|
|
|
if (ctx->retired_stored) {
|
|
|
|
|
err = prepare_backlog(txn, ctx);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
if (ctx->retired_stored >= MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
|
|
|
|
TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx),
|
|
|
|
|
retired_pages_before, MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
key.iov_len = sizeof(txnid_t);
|
|
|
|
|
key.iov_base = &ctx->bigfoot;
|
|
|
|
|
const size_t left =
|
|
|
|
|
MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored;
|
|
|
|
|
const size_t chunk =
|
|
|
|
|
(left > txn->env->maxgc_large1page && ctx->bigfoot < MAX_TXNID)
|
|
|
|
|
? txn->env->maxgc_large1page
|
|
|
|
|
: left;
|
|
|
|
|
data.iov_len = (chunk + 1) * sizeof(pgno_t);
|
|
|
|
|
err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
|
|
#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__))
|
|
|
|
|
/* Для предотвращения предупреждения Valgrind из mdbx_dump_val()
|
|
|
|
|
* вызванное через макрос DVAL_DEBUG() на выходе
|
|
|
|
|
* из cursor_seek(MDBX_SET_KEY), которая вызывается как выше в цикле
|
|
|
|
|
* очистки, так и ниже в цикле заполнения зарезервированных элементов.
|
|
|
|
|
*/
|
|
|
|
|
memset(data.iov_base, 0xBB, data.iov_len);
|
|
|
|
|
#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */
|
|
|
|
|
|
|
|
|
|
if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
const size_t at = (is_lifo(txn) == MDBX_PNL_ASCENDING)
|
2024-05-19 22:07:58 +03:00
|
|
|
|
? left - chunk
|
|
|
|
|
: ctx->retired_stored;
|
|
|
|
|
pgno_t *const begin = txn->tw.retired_pages + at;
|
|
|
|
|
/* MDBX_PNL_ASCENDING == false && LIFO == false:
|
|
|
|
|
* - the larger pgno is at the beginning of retired list
|
|
|
|
|
* and should be placed with the larger txnid.
|
|
|
|
|
* MDBX_PNL_ASCENDING == true && LIFO == true:
|
|
|
|
|
* - the larger pgno is at the ending of retired list
|
|
|
|
|
* and should be placed with the smaller txnid. */
|
|
|
|
|
const pgno_t save = *begin;
|
|
|
|
|
*begin = (pgno_t)chunk;
|
|
|
|
|
memcpy(data.iov_base, begin, data.iov_len);
|
|
|
|
|
*begin = save;
|
|
|
|
|
TRACE("%s: put-retired/bigfoot @ %" PRIaTXN
|
|
|
|
|
" (slice #%u) #%zu [%zu..%zu] of %zu",
|
|
|
|
|
dbg_prefix(ctx), ctx->bigfoot,
|
|
|
|
|
(unsigned)(ctx->bigfoot - txn->txnid), chunk, at, at + chunk,
|
|
|
|
|
retired_pages_before);
|
|
|
|
|
}
|
|
|
|
|
ctx->retired_stored += chunk;
|
|
|
|
|
} while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) &&
|
|
|
|
|
(++ctx->bigfoot, true));
|
|
|
|
|
} while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
|
|
|
|
#else
|
|
|
|
|
/* Write to last page of GC */
|
|
|
|
|
key.iov_len = sizeof(txnid_t);
|
|
|
|
|
key.iov_base = &txn->txnid;
|
|
|
|
|
do {
|
|
|
|
|
prepare_backlog(txn, ctx);
|
|
|
|
|
data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
|
|
|
|
|
err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
|
|
|
|
|
if (unlikely(err != MDBX_SUCCESS))
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
|
|
#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__))
|
|
|
|
|
/* Для предотвращения предупреждения Valgrind из mdbx_dump_val()
|
|
|
|
|
* вызванное через макрос DVAL_DEBUG() на выходе
|
|
|
|
|
* из cursor_seek(MDBX_SET_KEY), которая вызывается как выше в цикле
|
|
|
|
|
* очистки, так и ниже в цикле заполнения зарезервированных элементов. */
|
|
|
|
|
memset(data.iov_base, 0xBB, data.iov_len);
|
|
|
|
|
#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */
|
|
|
|
|
|
|
|
|
|
/* Retry if tw.retired_pages[] grew during the Put() */
|
|
|
|
|
} while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages));
|
|
|
|
|
|
|
|
|
|
ctx->retired_stored = MDBX_PNL_GETSIZE(txn->tw.retired_pages);
|
|
|
|
|
pnl_sort(txn->tw.retired_pages, txn->geo.first_unallocated);
|
|
|
|
|
tASSERT(txn, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages));
|
|
|
|
|
memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len);
|
|
|
|
|
|
|
|
|
|
TRACE("%s: put-retired #%zu @ %" PRIaTXN, dbg_prefix(ctx),
|
|
|
|
|
ctx->retired_stored, txn->txnid);
|
|
|
|
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
|
|
|
|
|
size_t i = ctx->retired_stored;
|
|
|
|
|
DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL",
|
|
|
|
|
txn->txnid, txn->dbs[FREE_DBI].root, i);
|
|
|
|
|
for (; i; i--)
|
|
|
|
|
DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]);
|
|
|
|
|
DEBUG_EXTRA_PRINT("%s\n", ".");
|
|
|
|
|
}
|
|
|
|
|
return MDBX_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typedef struct gcu_rid_result {
|
|
|
|
|
int err;
|
|
|
|
|
txnid_t rid;
|
|
|
|
|
} rid_t;
|
|
|
|
|
|
|
|
|
|
static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx,
|
|
|
|
|
const size_t left) {
|
|
|
|
|
rid_t r;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
if (is_lifo(txn)) {
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (txn->tw.gc.reclaimed == nullptr) {
|
|
|
|
|
txn->tw.gc.reclaimed = txl_alloc();
|
|
|
|
|
if (unlikely(!txn->tw.gc.reclaimed)) {
|
|
|
|
|
r.err = MDBX_ENOMEM;
|
|
|
|
|
goto return_error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max &&
|
|
|
|
|
left > (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) *
|
|
|
|
|
txn->env->maxgc_large1page &&
|
|
|
|
|
!ctx->dense) {
|
|
|
|
|
/* Hужен свободный для для сохранения списка страниц. */
|
|
|
|
|
bool need_cleanup = false;
|
|
|
|
|
txnid_t snap_oldest = 0;
|
|
|
|
|
retry_rid:
|
|
|
|
|
do {
|
|
|
|
|
r.err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err;
|
|
|
|
|
snap_oldest = txn->env->lck->cached_oldest.weak;
|
|
|
|
|
if (likely(r.err == MDBX_SUCCESS)) {
|
|
|
|
|
TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx),
|
|
|
|
|
MDBX_PNL_LAST(txn->tw.gc.reclaimed));
|
|
|
|
|
need_cleanup = true;
|
|
|
|
|
}
|
|
|
|
|
} while (r.err == MDBX_SUCCESS &&
|
|
|
|
|
MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max &&
|
|
|
|
|
left >
|
|
|
|
|
(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) *
|
|
|
|
|
txn->env->maxgc_large1page);
|
|
|
|
|
|
|
|
|
|
if (likely(r.err == MDBX_SUCCESS)) {
|
|
|
|
|
TRACE("%s: got enough from GC.", dbg_prefix(ctx));
|
|
|
|
|
goto return_continue;
|
|
|
|
|
} else if (unlikely(r.err != MDBX_NOTFOUND))
|
|
|
|
|
/* LY: some troubles... */
|
|
|
|
|
goto return_error;
|
|
|
|
|
|
|
|
|
|
if (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) {
|
|
|
|
|
if (need_cleanup) {
|
|
|
|
|
txl_sort(txn->tw.gc.reclaimed);
|
|
|
|
|
ctx->cleaned_slot = 0;
|
|
|
|
|
}
|
|
|
|
|
ctx->rid = MDBX_PNL_LAST(txn->tw.gc.reclaimed);
|
|
|
|
|
} else {
|
|
|
|
|
tASSERT(txn, txn->tw.gc.last_reclaimed == 0);
|
|
|
|
|
if (unlikely(txn_snapshot_oldest(txn) != snap_oldest))
|
|
|
|
|
/* should retry gc_alloc_ex()
|
|
|
|
|
* if the oldest reader changes since the last attempt */
|
|
|
|
|
goto retry_rid;
|
|
|
|
|
/* no reclaimable GC entries,
|
|
|
|
|
* therefore no entries with ID < mdbx_find_oldest(txn) */
|
|
|
|
|
txn->tw.gc.last_reclaimed = ctx->rid = snap_oldest;
|
|
|
|
|
TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix(ctx),
|
|
|
|
|
ctx->rid);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* В GC нет годных к переработке записей,
|
|
|
|
|
* будем использовать свободные id в обратном порядке. */
|
|
|
|
|
while (MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) < txl_max &&
|
|
|
|
|
left >
|
|
|
|
|
(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot) *
|
|
|
|
|
txn->env->maxgc_large1page) {
|
|
|
|
|
if (unlikely(ctx->rid <= MIN_TXNID)) {
|
|
|
|
|
if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) <=
|
|
|
|
|
ctx->reused_slot)) {
|
|
|
|
|
NOTICE("** restart: reserve depleted (reused_gc_slot %zu >= "
|
|
|
|
|
"gc.reclaimed %zu)",
|
|
|
|
|
ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed));
|
|
|
|
|
goto return_restart;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID);
|
|
|
|
|
ctx->rid -= 1;
|
|
|
|
|
MDBX_val key = {&ctx->rid, sizeof(ctx->rid)}, data;
|
|
|
|
|
r.err = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err;
|
|
|
|
|
if (unlikely(r.err == MDBX_SUCCESS)) {
|
|
|
|
|
DEBUG("%s: GC's id %" PRIaTXN " is present, going to first",
|
|
|
|
|
dbg_prefix(ctx), ctx->rid);
|
|
|
|
|
r.err = outer_first(&ctx->cursor, &key, nullptr);
|
|
|
|
|
if (unlikely(r.err != MDBX_SUCCESS ||
|
|
|
|
|
key.iov_len != sizeof(txnid_t))) {
|
|
|
|
|
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
|
|
|
|
"invalid GC-key size", (unsigned)key.iov_len);
|
|
|
|
|
r.err = MDBX_CORRUPTED;
|
|
|
|
|
goto return_error;
|
|
|
|
|
}
|
|
|
|
|
const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
|
|
|
|
if (unlikely(gc_first <= MIN_TXNID)) {
|
|
|
|
|
DEBUG("%s: no free GC's id(s) less than %" PRIaTXN
|
|
|
|
|
" (going dense-mode)",
|
|
|
|
|
dbg_prefix(ctx), ctx->rid);
|
|
|
|
|
ctx->dense = true;
|
|
|
|
|
goto return_restart;
|
|
|
|
|
}
|
|
|
|
|
ctx->rid = gc_first - 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tASSERT(txn, !ctx->dense);
|
|
|
|
|
r.err = txl_append(&txn->tw.gc.reclaimed, ctx->rid);
|
|
|
|
|
if (unlikely(r.err != MDBX_SUCCESS))
|
|
|
|
|
goto return_error;
|
|
|
|
|
|
|
|
|
|
if (ctx->reused_slot)
|
|
|
|
|
/* rare case, but it is better to clear and re-create GC entries
|
|
|
|
|
* with less fragmentation. */
|
|
|
|
|
need_cleanup = true;
|
|
|
|
|
else
|
|
|
|
|
ctx->cleaned_slot +=
|
|
|
|
|
1 /* mark cleanup is not needed for added slot. */;
|
|
|
|
|
|
|
|
|
|
TRACE("%s: append @%" PRIaTXN
|
|
|
|
|
" to lifo-reclaimed, cleaned-gc-slot = %zu",
|
|
|
|
|
dbg_prefix(ctx), ctx->rid, ctx->cleaned_slot);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (need_cleanup) {
|
|
|
|
|
if (ctx->cleaned_slot) {
|
|
|
|
|
TRACE("%s: restart to clear and re-create GC entries",
|
|
|
|
|
dbg_prefix(ctx));
|
|
|
|
|
goto return_restart;
|
|
|
|
|
}
|
|
|
|
|
goto return_continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t i = MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot;
|
|
|
|
|
tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed));
|
|
|
|
|
r.rid = txn->tw.gc.reclaimed[i];
|
|
|
|
|
TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix(ctx),
|
|
|
|
|
r.rid, i);
|
|
|
|
|
} else {
|
|
|
|
|
tASSERT(txn, txn->tw.gc.reclaimed == nullptr);
|
|
|
|
|
if (unlikely(ctx->rid == 0)) {
|
|
|
|
|
ctx->rid = txn_snapshot_oldest(txn);
|
|
|
|
|
MDBX_val key;
|
|
|
|
|
r.err = outer_first(&ctx->cursor, &key, nullptr);
|
|
|
|
|
if (likely(r.err == MDBX_SUCCESS)) {
|
|
|
|
|
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
|
|
|
|
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
|
|
|
|
"invalid GC-key size", (unsigned)key.iov_len);
|
|
|
|
|
r.err = MDBX_CORRUPTED;
|
|
|
|
|
goto return_error;
|
|
|
|
|
}
|
|
|
|
|
const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
|
|
|
|
if (ctx->rid >= gc_first)
|
|
|
|
|
ctx->rid = gc_first - 1;
|
|
|
|
|
if (unlikely(ctx->rid == 0)) {
|
|
|
|
|
ERROR("%s", "** no GC tail-space to store (going dense-mode)");
|
|
|
|
|
ctx->dense = true;
|
|
|
|
|
goto return_restart;
|
|
|
|
|
}
|
|
|
|
|
} else if (r.err != MDBX_NOTFOUND) {
|
|
|
|
|
r.rid = 0;
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
txn->tw.gc.last_reclaimed = ctx->rid;
|
|
|
|
|
ctx->cleaned_id = ctx->rid + 1;
|
|
|
|
|
}
|
|
|
|
|
r.rid = ctx->rid--;
|
|
|
|
|
TRACE("%s: take @%" PRIaTXN " from GC", dbg_prefix(ctx), r.rid);
|
|
|
|
|
}
|
|
|
|
|
++ctx->reused_slot;
|
|
|
|
|
r.err = MDBX_SUCCESS;
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
return_continue:
|
|
|
|
|
r.err = MDBX_SUCCESS;
|
|
|
|
|
r.rid = 0;
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
return_restart:
|
|
|
|
|
r.err = MDBX_RESULT_TRUE;
|
|
|
|
|
r.rid = 0;
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
return_error:
|
|
|
|
|
tASSERT(txn, r.err != MDBX_SUCCESS);
|
|
|
|
|
r.rid = 0;
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Cleanups reclaimed GC (aka freeDB) records, saves the retired-list (aka
|
|
|
|
|
* freelist) of current transaction to GC, puts back into GC leftover of the
|
|
|
|
|
* reclaimed pages with chunking. This recursive changes the reclaimed-list,
|
|
|
|
|
* loose-list and retired-list. Keep trying until it stabilizes.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: This code is a consequence of many iterations of adding crutches (aka
|
|
|
|
|
* "checks and balances") to partially bypass the fundamental design problems
|
|
|
|
|
* inherited from LMDB. So do not try to understand it completely in order to
|
|
|
|
|
* avoid your madness. */
|
|
|
|
|
int gc_update(MDBX_txn *txn, gcu_t *ctx) {
|
|
|
|
|
TRACE("\n>>> @%" PRIaTXN, txn->txnid);
|
|
|
|
|
MDBX_env *const env = txn->env;
|
|
|
|
|
ctx->cursor.next = txn->cursors[FREE_DBI];
|
|
|
|
|
txn->cursors[FREE_DBI] = &ctx->cursor;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
// tASSERT(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages) ||
|
|
|
|
|
// ctx->cleaned_slot <
|
|
|
|
|
// (txn->tw.gc.reclaimed ?
|
|
|
|
|
// MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)
|
|
|
|
|
// || ctx->cleaned_id < txn->tw.gc.last_reclaimed);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
|
|
|
|
|
/* txn->tw.relist[] can grow and shrink during this call.
|
|
|
|
|
* txn->tw.gc.last_reclaimed and txn->tw.retired_pages[] can only grow.
|
|
|
|
|
* But page numbers cannot disappear from txn->tw.retired_pages[]. */
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
retry_clean_adj:
|
|
|
|
|
ctx->reserve_adj = 0;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
retry:
|
2024-05-23 12:42:59 +03:00
|
|
|
|
ctx->loop += ctx->prev_first_unallocated == txn->geo.first_unallocated;
|
|
|
|
|
TRACE(">> restart, loop %u", ctx->loop);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
|
|
|
|
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
tASSERT(txn, dpl_check(txn));
|
|
|
|
|
if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) {
|
2024-08-19 08:43:33 +03:00
|
|
|
|
ERROR("txn #%" PRIaTXN " too more loops %u, bailout", txn->txnid,
|
|
|
|
|
ctx->loop);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
rc = MDBX_PROBLEM;
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-23 12:42:59 +03:00
|
|
|
|
if (unlikely(ctx->dense ||
|
|
|
|
|
ctx->prev_first_unallocated > txn->geo.first_unallocated)) {
|
2024-05-19 22:07:58 +03:00
|
|
|
|
rc = clean_stored_retired(txn, ctx);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-23 12:42:59 +03:00
|
|
|
|
ctx->prev_first_unallocated = txn->geo.first_unallocated;
|
|
|
|
|
rc = MDBX_SUCCESS;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
ctx->reserved = 0;
|
|
|
|
|
ctx->cleaned_slot = 0;
|
|
|
|
|
ctx->reused_slot = 0;
|
|
|
|
|
ctx->amount = ctx->fill_idx = ~0u;
|
|
|
|
|
ctx->cleaned_id = 0;
|
|
|
|
|
ctx->rid = txn->tw.gc.last_reclaimed;
|
|
|
|
|
while (true) {
|
|
|
|
|
/* Come back here after each Put() in case retired-list changed */
|
|
|
|
|
TRACE("%s", " >> continue");
|
|
|
|
|
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
MDBX_val key, data;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
if (is_lifo(txn)) {
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (ctx->cleaned_slot <
|
|
|
|
|
(txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0)) {
|
|
|
|
|
ctx->reserved = 0;
|
|
|
|
|
ctx->cleaned_slot = 0;
|
|
|
|
|
ctx->reused_slot = 0;
|
|
|
|
|
ctx->fill_idx = ~0u;
|
|
|
|
|
/* LY: cleanup reclaimed records. */
|
|
|
|
|
do {
|
|
|
|
|
ctx->cleaned_id = txn->tw.gc.reclaimed[++ctx->cleaned_slot];
|
|
|
|
|
tASSERT(txn, ctx->cleaned_slot > 0 &&
|
|
|
|
|
ctx->cleaned_id <= env->lck->cached_oldest.weak);
|
|
|
|
|
key.iov_base = &ctx->cleaned_id;
|
|
|
|
|
key.iov_len = sizeof(ctx->cleaned_id);
|
|
|
|
|
rc = cursor_seek(&ctx->cursor, &key, nullptr, MDBX_SET).err;
|
|
|
|
|
if (rc == MDBX_NOTFOUND)
|
|
|
|
|
continue;
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
rc = prepare_backlog(txn, ctx);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak);
|
|
|
|
|
TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix(ctx),
|
|
|
|
|
ctx->cleaned_slot, ctx->cleaned_id);
|
|
|
|
|
tASSERT(txn, *txn->cursors == &ctx->cursor);
|
|
|
|
|
rc = cursor_del(&ctx->cursor, 0);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
} while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed));
|
|
|
|
|
txl_sort(txn->tw.gc.reclaimed);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* Удаляем оставшиеся вынутые из GC записи. */
|
2024-05-23 12:42:59 +03:00
|
|
|
|
while (txn->tw.gc.last_reclaimed &&
|
|
|
|
|
ctx->cleaned_id <= txn->tw.gc.last_reclaimed) {
|
2024-05-19 22:07:58 +03:00
|
|
|
|
rc = outer_first(&ctx->cursor, &key, nullptr);
|
|
|
|
|
if (rc == MDBX_NOTFOUND)
|
|
|
|
|
break;
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
if (!MDBX_DISABLE_VALIDATION &&
|
|
|
|
|
unlikely(key.iov_len != sizeof(txnid_t))) {
|
|
|
|
|
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
|
|
|
|
"invalid GC-key size", (unsigned)key.iov_len);
|
|
|
|
|
rc = MDBX_CORRUPTED;
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
if (ctx->rid != ctx->cleaned_id) {
|
|
|
|
|
ctx->rid = ctx->cleaned_id;
|
|
|
|
|
ctx->reserved = 0;
|
|
|
|
|
ctx->reused_slot = 0;
|
|
|
|
|
}
|
|
|
|
|
ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base);
|
|
|
|
|
if (ctx->cleaned_id > txn->tw.gc.last_reclaimed)
|
|
|
|
|
break;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
rc = prepare_backlog(txn, ctx);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
tASSERT(txn, ctx->cleaned_id <= txn->tw.gc.last_reclaimed);
|
|
|
|
|
tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak);
|
|
|
|
|
TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix(ctx),
|
|
|
|
|
ctx->cleaned_id);
|
|
|
|
|
tASSERT(txn, *txn->cursors == &ctx->cursor);
|
|
|
|
|
rc = cursor_del(&ctx->cursor, 0);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
tASSERT(txn, dpl_check(txn));
|
|
|
|
|
if (AUDIT_ENABLED()) {
|
|
|
|
|
rc = audit_ex(txn, ctx->retired_stored, false);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* return suitable into unallocated space */
|
|
|
|
|
if (txn_refund(txn)) {
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
if (AUDIT_ENABLED()) {
|
|
|
|
|
rc = audit_ex(txn, ctx->retired_stored, false);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (txn->tw.loose_pages) {
|
|
|
|
|
/* put loose pages into the reclaimed- or retired-list */
|
|
|
|
|
rc = gcu_loose(txn, ctx);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
if (unlikely(txn->tw.loose_pages))
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.relist)) &&
|
|
|
|
|
(ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist) >
|
|
|
|
|
env->maxgc_large1page / 2)) {
|
|
|
|
|
TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx),
|
|
|
|
|
ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist));
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.relist);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
ctx->amount = MDBX_PNL_GETSIZE(txn->tw.relist);
|
|
|
|
|
|
|
|
|
|
if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
|
|
|
|
/* store retired-list into GC */
|
|
|
|
|
rc = gcu_retired(txn, ctx);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
tASSERT(txn, txn->tw.loose_count == 0);
|
|
|
|
|
|
|
|
|
|
TRACE("%s", " >> reserving");
|
|
|
|
|
if (AUDIT_ENABLED()) {
|
|
|
|
|
rc = audit_ex(txn, ctx->retired_stored, false);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj;
|
|
|
|
|
TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, "
|
|
|
|
|
"lifo-reclaimed-slots %zu, "
|
|
|
|
|
"reused-gc-slots %zu",
|
|
|
|
|
dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left,
|
|
|
|
|
txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0,
|
|
|
|
|
ctx->reused_slot);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#else
|
|
|
|
|
const size_t left = ctx->amount - ctx->reserved;
|
|
|
|
|
TRACE("%s: amount %zu, reserved %zd, left %zd, "
|
|
|
|
|
"lifo-reclaimed-slots %zu, "
|
|
|
|
|
"reused-gc-slots %zu",
|
|
|
|
|
dbg_prefix(ctx), ctx->amount, ctx->reserved, left,
|
|
|
|
|
txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0,
|
|
|
|
|
ctx->reused_slot);
|
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (0 >= (intptr_t)left)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
const rid_t rid_result = get_rid_for_reclaimed(txn, ctx, left);
|
|
|
|
|
if (unlikely(!rid_result.rid)) {
|
|
|
|
|
rc = rid_result.err;
|
|
|
|
|
if (likely(rc == MDBX_SUCCESS))
|
|
|
|
|
continue;
|
|
|
|
|
if (likely(rc == MDBX_RESULT_TRUE))
|
|
|
|
|
goto retry;
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
tASSERT(txn, rid_result.err == MDBX_SUCCESS);
|
|
|
|
|
const txnid_t reservation_gc_id = rid_result.rid;
|
|
|
|
|
|
|
|
|
|
size_t chunk = left;
|
|
|
|
|
if (unlikely(left > env->maxgc_large1page)) {
|
|
|
|
|
const size_t avail_gc_slots =
|
|
|
|
|
txn->tw.gc.reclaimed
|
|
|
|
|
? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot + 1
|
|
|
|
|
: (ctx->rid < INT16_MAX) ? (size_t)ctx->rid
|
|
|
|
|
: INT16_MAX;
|
|
|
|
|
if (likely(avail_gc_slots > 1)) {
|
|
|
|
|
#if MDBX_ENABLE_BIGFOOT
|
|
|
|
|
chunk = env->maxgc_large1page;
|
|
|
|
|
if (avail_gc_slots < INT16_MAX &&
|
|
|
|
|
unlikely(left > env->maxgc_large1page * avail_gc_slots))
|
|
|
|
|
/* TODO: Можно смотреть последовательности какой длины есть в relist
|
|
|
|
|
* и пробовать нарезать куски соответствующего размера.
|
|
|
|
|
* Смысл в том, чтобы не дробить последовательности страниц,
|
|
|
|
|
* а использовать целиком. */
|
|
|
|
|
chunk = env->maxgc_large1page +
|
|
|
|
|
left / (env->maxgc_large1page * avail_gc_slots) *
|
|
|
|
|
env->maxgc_large1page;
|
|
|
|
|
#else
|
|
|
|
|
if (chunk < env->maxgc_large1page * 2)
|
|
|
|
|
chunk /= 2;
|
|
|
|
|
else {
|
|
|
|
|
const size_t prefer_max_scatter = 257;
|
|
|
|
|
const size_t threshold =
|
|
|
|
|
env->maxgc_large1page * ((avail_gc_slots < prefer_max_scatter)
|
|
|
|
|
? avail_gc_slots
|
|
|
|
|
: prefer_max_scatter);
|
|
|
|
|
if (left < threshold)
|
|
|
|
|
chunk = env->maxgc_large1page;
|
|
|
|
|
else {
|
|
|
|
|
const size_t tail = left - threshold + env->maxgc_large1page + 1;
|
|
|
|
|
size_t span = 1;
|
|
|
|
|
size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) /
|
|
|
|
|
sizeof(pgno_t)) /* - 1 + span */;
|
|
|
|
|
if (tail > avail) {
|
|
|
|
|
for (size_t i = ctx->amount - span; i > 0; --i) {
|
|
|
|
|
if (MDBX_PNL_ASCENDING ? (txn->tw.relist[i] + span)
|
|
|
|
|
: (txn->tw.relist[i] - span) ==
|
|
|
|
|
txn->tw.relist[i + span]) {
|
|
|
|
|
span += 1;
|
|
|
|
|
avail =
|
|
|
|
|
((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) -
|
|
|
|
|
1 + span;
|
|
|
|
|
if (avail >= tail)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chunk = (avail >= tail) ? tail - span
|
|
|
|
|
: (avail_gc_slots > 3 &&
|
|
|
|
|
ctx->reused_slot < prefer_max_scatter - 3)
|
|
|
|
|
? avail - span
|
|
|
|
|
: tail;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tASSERT(txn, chunk > 0);
|
|
|
|
|
|
|
|
|
|
TRACE("%s: gc_rid %" PRIaTXN ", reused_gc_slot %zu, reservation-id "
|
|
|
|
|
"%" PRIaTXN,
|
|
|
|
|
dbg_prefix(ctx), ctx->rid, ctx->reused_slot, reservation_gc_id);
|
|
|
|
|
|
|
|
|
|
TRACE("%s: chunk %zu, gc-per-ovpage %u", dbg_prefix(ctx), chunk,
|
|
|
|
|
env->maxgc_large1page);
|
|
|
|
|
|
|
|
|
|
tASSERT(txn, reservation_gc_id <= env->lck->cached_oldest.weak);
|
|
|
|
|
if (unlikely(reservation_gc_id < MIN_TXNID ||
|
|
|
|
|
reservation_gc_id >
|
|
|
|
|
atomic_load64(&env->lck->cached_oldest, mo_Relaxed))) {
|
|
|
|
|
ERROR("** internal error (reservation_gc_id %" PRIaTXN ")",
|
|
|
|
|
reservation_gc_id);
|
|
|
|
|
rc = MDBX_PROBLEM;
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
key.iov_len = sizeof(reservation_gc_id);
|
|
|
|
|
key.iov_base = (void *)&reservation_gc_id;
|
|
|
|
|
data.iov_len = (chunk + 1) * sizeof(pgno_t);
|
|
|
|
|
TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix(ctx), chunk,
|
|
|
|
|
ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id);
|
|
|
|
|
prepare_backlog(txn, ctx);
|
|
|
|
|
rc = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE);
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
|
|
|
|
|
zeroize_reserved(env, data);
|
|
|
|
|
ctx->reserved += chunk;
|
|
|
|
|
TRACE("%s: reserved %zu (+%zu), continue", dbg_prefix(ctx), ctx->reserved,
|
|
|
|
|
chunk);
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tASSERT(
|
|
|
|
|
txn,
|
|
|
|
|
ctx->cleaned_slot ==
|
|
|
|
|
(txn->tw.gc.reclaimed ? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) : 0));
|
|
|
|
|
|
|
|
|
|
TRACE("%s", " >> filling");
|
|
|
|
|
/* Fill in the reserved records */
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
size_t excess_slots = 0;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
ctx->fill_idx =
|
|
|
|
|
txn->tw.gc.reclaimed
|
|
|
|
|
? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed) - ctx->reused_slot
|
|
|
|
|
: ctx->reused_slot;
|
|
|
|
|
rc = MDBX_SUCCESS;
|
|
|
|
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->geo.first_unallocated -
|
|
|
|
|
MDBX_ENABLE_REFUND));
|
|
|
|
|
tASSERT(txn, dpl_check(txn));
|
|
|
|
|
if (ctx->amount) {
|
|
|
|
|
MDBX_val key, data;
|
|
|
|
|
key.iov_len = data.iov_len = 0; /* avoid MSVC warning */
|
|
|
|
|
key.iov_base = data.iov_base = nullptr;
|
|
|
|
|
|
|
|
|
|
size_t left = ctx->amount, excess = 0;
|
|
|
|
|
if (txn->tw.gc.reclaimed == nullptr) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
tASSERT(txn, is_lifo(txn) == 0);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
rc = outer_first(&ctx->cursor, &key, &data);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
2024-05-19 22:07:58 +03:00
|
|
|
|
goto bailout;
|
|
|
|
|
} else {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
tASSERT(txn, is_lifo(txn) != 0);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
txnid_t fill_gc_id;
|
|
|
|
|
TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left,
|
|
|
|
|
MDBX_PNL_GETSIZE(txn->tw.relist));
|
|
|
|
|
if (txn->tw.gc.reclaimed == nullptr) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
tASSERT(txn, is_lifo(txn) == 0);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
fill_gc_id = unaligned_peek_u64(4, key.iov_base);
|
|
|
|
|
if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (!left)
|
|
|
|
|
break;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN
|
|
|
|
|
" > last_reclaimed %" PRIaTXN ", left %zu",
|
|
|
|
|
ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
ctx->reserve_adj =
|
|
|
|
|
(ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
ctx->fill_idx -= 1;
|
|
|
|
|
} else {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
tASSERT(txn, is_lifo(txn) != 0);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (!left)
|
|
|
|
|
break;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
NOTICE("** restart: reserve depleted (fill_idx %zu >= "
|
|
|
|
|
"gc.reclaimed %zu, left %zu",
|
|
|
|
|
ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed), left);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
ctx->reserve_adj =
|
|
|
|
|
(ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
ctx->fill_idx += 1;
|
|
|
|
|
fill_gc_id = txn->tw.gc.reclaimed[ctx->fill_idx];
|
|
|
|
|
TRACE("%s: seek-reservation @%" PRIaTXN " at gc.reclaimed[%zu]",
|
|
|
|
|
dbg_prefix(ctx), fill_gc_id, ctx->fill_idx);
|
|
|
|
|
key.iov_base = &fill_gc_id;
|
|
|
|
|
key.iov_len = sizeof(fill_gc_id);
|
|
|
|
|
rc = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err;
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
ctx->cleaned_slot == (txn->tw.gc.reclaimed
|
|
|
|
|
? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)
|
|
|
|
|
: 0));
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
fill_gc_id > 0 && fill_gc_id <= env->lck->cached_oldest.weak);
|
|
|
|
|
key.iov_base = &fill_gc_id;
|
|
|
|
|
key.iov_len = sizeof(fill_gc_id);
|
|
|
|
|
|
|
|
|
|
tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
|
|
|
|
size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
|
|
|
|
|
if (unlikely(chunk > left)) {
|
|
|
|
|
const size_t delta = chunk - left;
|
|
|
|
|
excess += delta;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk,
|
|
|
|
|
left, fill_gc_id);
|
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (!left) {
|
|
|
|
|
excess_slots += 1;
|
|
|
|
|
goto next;
|
|
|
|
|
}
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if ((ctx->loop < 5 && delta > (ctx->loop / 2)) ||
|
|
|
|
|
delta > env->maxgc_large1page)
|
|
|
|
|
data.iov_len = (left + 1) * sizeof(pgno_t);
|
|
|
|
|
chunk = left;
|
|
|
|
|
}
|
|
|
|
|
rc = cursor_put(&ctx->cursor, &key, &data, MDBX_CURRENT | MDBX_RESERVE);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
zeroize_reserved(env, data);
|
|
|
|
|
|
|
|
|
|
if (unlikely(txn->tw.loose_count ||
|
|
|
|
|
ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) {
|
|
|
|
|
NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)",
|
|
|
|
|
ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist),
|
|
|
|
|
txn->tw.loose_count);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1)))
|
|
|
|
|
goto retry_clean_adj;
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (unlikely(txn->tw.gc.reclaimed
|
|
|
|
|
? ctx->cleaned_slot <
|
|
|
|
|
MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)
|
|
|
|
|
: ctx->cleaned_id < txn->tw.gc.last_reclaimed)) {
|
|
|
|
|
NOTICE("%s", "** restart: reclaimed-slots changed");
|
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
if (unlikely(ctx->retired_stored !=
|
|
|
|
|
MDBX_PNL_GETSIZE(txn->tw.retired_pages))) {
|
|
|
|
|
tASSERT(txn,
|
|
|
|
|
ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
|
|
|
|
NOTICE("** restart: retired-list growth (%zu -> %zu)",
|
|
|
|
|
ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pgno_t *dst = data.iov_base;
|
|
|
|
|
*dst++ = (pgno_t)chunk;
|
|
|
|
|
pgno_t *src = MDBX_PNL_BEGIN(txn->tw.relist) + left - chunk;
|
|
|
|
|
memcpy(dst, src, chunk * sizeof(pgno_t));
|
|
|
|
|
pgno_t *from = src, *to = src + chunk;
|
|
|
|
|
TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN,
|
|
|
|
|
dbg_prefix(ctx), chunk, from - txn->tw.relist, from[0],
|
|
|
|
|
to - txn->tw.relist, to[-1], fill_gc_id);
|
|
|
|
|
|
|
|
|
|
left -= chunk;
|
|
|
|
|
if (AUDIT_ENABLED()) {
|
|
|
|
|
rc = audit_ex(txn, ctx->retired_stored + ctx->amount - left, true);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS))
|
|
|
|
|
goto bailout;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
next:
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#else
|
|
|
|
|
if (left == 0)
|
|
|
|
|
break;
|
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
|
|
|
|
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (txn->tw.gc.reclaimed == nullptr) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
tASSERT(txn, is_lifo(txn) == 0);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
rc = outer_next(&ctx->cursor, &key, &data, MDBX_NEXT);
|
|
|
|
|
if (unlikely(rc != MDBX_SUCCESS)) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
|
|
|
|
if (rc == MDBX_NOTFOUND && !left) {
|
|
|
|
|
rc = MDBX_SUCCESS;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
|
|
|
|
goto bailout;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
|
|
|
|
} else {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
tASSERT(txn, is_lifo(txn) != 0);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (excess) {
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
size_t n = excess, adj = excess;
|
|
|
|
|
while (n >= env->maxgc_large1page)
|
|
|
|
|
adj -= n /= env->maxgc_large1page;
|
|
|
|
|
ctx->reserve_adj += adj;
|
|
|
|
|
TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx),
|
|
|
|
|
excess, adj, ctx->reserve_adj);
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tASSERT(txn, rc == MDBX_SUCCESS);
|
|
|
|
|
if (unlikely(txn->tw.loose_count != 0 ||
|
|
|
|
|
ctx->amount != MDBX_PNL_GETSIZE(txn->tw.relist))) {
|
|
|
|
|
NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)",
|
|
|
|
|
txn->tw.loose_count, ctx->amount, MDBX_PNL_GETSIZE(txn->tw.relist));
|
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#if MDBX_ENABLE_GC_EXPERIMENTAL
|
2024-05-19 22:07:58 +03:00
|
|
|
|
if (unlikely(excess_slots)) {
|
|
|
|
|
const bool will_retry = ctx->loop < 5 || excess_slots > 1;
|
|
|
|
|
NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, "
|
|
|
|
|
"loop %zu)",
|
|
|
|
|
will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx,
|
|
|
|
|
ctx->reserve_adj, ctx->loop);
|
|
|
|
|
if (will_retry)
|
|
|
|
|
goto retry;
|
|
|
|
|
}
|
2024-05-23 12:42:59 +03:00
|
|
|
|
#else
|
|
|
|
|
if (unlikely(ctx->fill_idx != (txn->tw.gc.reclaimed
|
|
|
|
|
? MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed)
|
|
|
|
|
: 0))) {
|
|
|
|
|
const bool will_retry = ctx->loop < 9;
|
|
|
|
|
NOTICE("** %s: reserve excess (filled-idx %zu, loop %u)",
|
|
|
|
|
will_retry ? "restart" : "ignore", ctx->fill_idx, ctx->loop);
|
|
|
|
|
if (will_retry)
|
|
|
|
|
goto retry;
|
|
|
|
|
}
|
|
|
|
|
#endif /* MDBX_ENABLE_GC_EXPERIMENTAL */
|
2024-05-19 22:07:58 +03:00
|
|
|
|
|
|
|
|
|
tASSERT(txn, txn->tw.gc.reclaimed == nullptr ||
|
|
|
|
|
ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.reclaimed));
|
|
|
|
|
|
|
|
|
|
bailout:
|
|
|
|
|
txn->cursors[FREE_DBI] = ctx->cursor.next;
|
|
|
|
|
|
|
|
|
|
MDBX_PNL_SETSIZE(txn->tw.relist, 0);
|
|
|
|
|
#if MDBX_ENABLE_PROFGC
|
|
|
|
|
env->lck->pgops.gc_prof.wloops += (uint32_t)ctx->loop;
|
|
|
|
|
#endif /* MDBX_ENABLE_PROFGC */
|
2024-05-23 12:42:59 +03:00
|
|
|
|
TRACE("<<< %u loops, rc = %d", ctx->loop, rc);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
return rc;
|
|
|
|
|
}
|