mdbx: новый код обновления GC.

This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2025-04-26 00:15:41 +03:00
parent 011c3072da
commit 2b36fd5974
No known key found for this signature in database
GPG Key ID: 518BD10B927E8686
24 changed files with 1493 additions and 905 deletions

6
mdbx.h
View File

@ -2775,10 +2775,10 @@ typedef struct MDBX_stat MDBX_stat;
* Legacy mdbx_env_stat() correspond to calling \ref mdbx_env_stat_ex() with the * Legacy mdbx_env_stat() correspond to calling \ref mdbx_env_stat_ex() with the
* null `txn` argument. * null `txn` argument.
* *
* \param [in] env An environment handle returned by \ref mdbx_env_create() * \param [in] env An environment handle returned by \ref mdbx_env_create().
* \param [in] txn A transaction handle returned by \ref mdbx_txn_begin() * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin().
* \param [out] stat The address of an \ref MDBX_stat structure where * \param [out] stat The address of an \ref MDBX_stat structure where
* the statistics will be copied * the statistics will be copied.
* \param [in] bytes The size of \ref MDBX_stat. * \param [in] bytes The size of \ref MDBX_stat.
* *
* \returns A non-zero error value on failure and 0 on success. */ * \returns A non-zero error value on failure and 0 on success. */

View File

@ -955,7 +955,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si
env->basal_txn->wr.troika = meta_tap(env); env->basal_txn->wr.troika = meta_tap(env);
eASSERT(env, !env->txn && !env->basal_txn->nested); eASSERT(env, !env->txn && !env->basal_txn->nested);
env->basal_txn->txnid = env->basal_txn->wr.troika.txnid[env->basal_txn->wr.troika.recent]; env->basal_txn->txnid = env->basal_txn->wr.troika.txnid[env->basal_txn->wr.troika.recent];
txn_snapshot_oldest(env->basal_txn); txn_gc_detent(env->basal_txn);
} }
/* get untouched params from current TXN or DB */ /* get untouched params from current TXN or DB */

View File

@ -513,23 +513,25 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
info->txn_reader_lag = INT64_MAX; info->txn_reader_lag = INT64_MAX;
lck_t *const lck = env->lck_mmap.lck; lck_t *const lck = env->lck_mmap.lck;
if (scan_rlt && lck) { if (scan_rlt && lck) {
txnid_t oldest_snapshot = txn->txnid; txnid_t oldest_reading = txn->txnid;
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease); const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
if (snap_nreaders) { if (snap_nreaders) {
oldest_snapshot = txn_snapshot_oldest(txn); txn_gc_detent(txn);
if (oldest_snapshot == txn->txnid - 1) { oldest_reading = txn->env->gc.detent;
/* check if there is at least one reader */ if (oldest_reading == txn->wr.troika.txnid[txn->wr.troika.recent]) {
bool exists = false; /* Если самый старый используемый снимок является предыдущим, т. е. непосредственно предшествующим текущей
* транзакции, то просматриваем таблицу читателей чтобы выяснить действительно ли снимок используется
* читателями. */
oldest_reading = txn->txnid;
for (size_t i = 0; i < snap_nreaders; ++i) { for (size_t i = 0; i < snap_nreaders; ++i) {
if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->txnid > safe64_read(&lck->rdt[i].txnid)) { if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->env->gc.detent == safe64_read(&lck->rdt[i].txnid)) {
exists = true; oldest_reading = txn->env->gc.detent;
break; break;
} }
} }
oldest_snapshot += !exists;
} }
} }
info->txn_reader_lag = txn->txnid - oldest_snapshot; info->txn_reader_lag = txn->txnid - oldest_reading;
} }
} }

View File

@ -24,12 +24,11 @@ static size_t audit_db_used(const tree_t *db) {
return db ? (size_t)db->branch_pages + (size_t)db->leaf_pages + (size_t)db->large_pages : 0; return db ? (size_t)db->branch_pages + (size_t)db->leaf_pages + (size_t)db->large_pages : 0;
} }
__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc) { __cold static int audit_ex_locked(MDBX_txn *txn, const size_t retired_stored, const bool dont_filter_gc) {
const MDBX_env *const env = txn->env; const MDBX_env *const env = txn->env;
size_t pending = 0; tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
if ((txn->flags & MDBX_TXN_RDONLY) == 0) const size_t pending = txn->wr.loose_count + MDBX_PNL_GETSIZE(txn->wr.repnl) +
pending = txn->wr.loose_count + MDBX_PNL_GETSIZE(txn->wr.repnl) + (MDBX_PNL_GETSIZE(txn->wr.retired_pages) - retired_stored);
(MDBX_PNL_GETSIZE(txn->wr.retired_pages) - retired_stored);
cursor_couple_t cx; cursor_couple_t cx;
int rc = cursor_init(&cx.outer, txn, FREE_DBI); int rc = cursor_init(&cx.outer, txn, FREE_DBI);
@ -40,17 +39,16 @@ __cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool don
MDBX_val key, data; MDBX_val key, data;
rc = outer_first(&cx.outer, &key, &data); rc = outer_first(&cx.outer, &key, &data);
while (rc == MDBX_SUCCESS) { while (rc == MDBX_SUCCESS) {
if (!dont_filter_gc) { if (unlikely(key.iov_len != sizeof(txnid_t))) {
if (unlikely(key.iov_len != sizeof(txnid_t))) { ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len);
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len); return MDBX_CORRUPTED;
return MDBX_CORRUPTED;
}
txnid_t id = unaligned_peek_u64(4, key.iov_base);
if (txn->wr.gc.retxl ? txl_contain(txn->wr.gc.retxl, id) : (id <= txn->wr.gc.last_reclaimed))
goto skip;
} }
gc += *(pgno_t *)data.iov_base; const txnid_t id = unaligned_peek_u64(4, key.iov_base);
skip: const size_t len = *(pgno_t *)data.iov_base;
const bool acc = dont_filter_gc || !gc_is_reclaimed(txn, id);
TRACE("%s id %" PRIaTXN " len %zu", acc ? "acc" : "skip", id, len);
if (acc)
gc += len;
rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT); rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT);
} }
tASSERT(txn, rc == MDBX_NOTFOUND); tASSERT(txn, rc == MDBX_NOTFOUND);

View File

@ -1780,8 +1780,7 @@ __hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cur
} }
int cmp = mc->clc->k.cmp(&aligned_key, &nodekey); int cmp = mc->clc->k.cmp(&aligned_key, &nodekey);
if (unlikely(cmp == 0)) { if (unlikely(cmp == 0)) {
/* Probably happens rarely, but first node on the page /* Probably happens rarely, but first node on the page was the one we wanted. */
* was the one we wanted. */
mc->ki[mc->top] = 0; mc->ki[mc->top] = 0;
ret.exact = true; ret.exact = true;
goto got_node; goto got_node;

View File

@ -53,7 +53,7 @@ static inline dpl_t *dpl_sort(const MDBX_txn *txn) {
return likely(dl->sorted == dl->length) ? dl : dpl_sort_slowpath(txn); return likely(dl->sorted == dl->length) ? dl : dpl_sort_slowpath(txn);
} }
MDBX_INTERNAL __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno); MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno);
MDBX_MAYBE_UNUSED MDBX_INTERNAL const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno); MDBX_MAYBE_UNUSED MDBX_INTERNAL const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno);
@ -68,7 +68,7 @@ MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t dpl_endpgno(const dpl_t *dl, siz
return dpl_npages(dl, i) + dl->items[i].pgno; return dpl_npages(dl, i) + dl->items[i].pgno;
} }
static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) { MDBX_NOTHROW_PURE_FUNCTION static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) {
tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

View File

@ -1061,16 +1061,17 @@ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, troika
#endif /* MADV_DONTNEED || POSIX_MADV_DONTNEED */ #endif /* MADV_DONTNEED || POSIX_MADV_DONTNEED */
/* LY: check conditions to shrink datafile */ /* LY: check conditions to shrink datafile */
const pgno_t backlog_gap = 3 + pending->trees.gc.height * 3; const pgno_t stockpile_gap = 3 + pending->trees.gc.height * 3;
pgno_t shrink_step = 0; pgno_t shrink_step = 0;
if (pending->geometry.shrink_pv && pending->geometry.now - pending->geometry.first_unallocated > if (pending->geometry.shrink_pv && pending->geometry.now - pending->geometry.first_unallocated >
(shrink_step = pv2pages(pending->geometry.shrink_pv)) + backlog_gap) { (shrink_step = pv2pages(pending->geometry.shrink_pv)) + stockpile_gap) {
if (pending->geometry.now > largest_pgno && pending->geometry.now - largest_pgno > shrink_step + backlog_gap) { if (pending->geometry.now > largest_pgno &&
pending->geometry.now - largest_pgno > shrink_step + stockpile_gap) {
const pgno_t aligner = const pgno_t aligner =
pending->geometry.grow_pv ? /* grow_step */ pv2pages(pending->geometry.grow_pv) : shrink_step; pending->geometry.grow_pv ? /* grow_step */ pv2pages(pending->geometry.grow_pv) : shrink_step;
const pgno_t with_backlog_gap = largest_pgno + backlog_gap; const pgno_t with_stockpile_gap = largest_pgno + stockpile_gap;
const pgno_t aligned = const pgno_t aligned =
pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - with_backlog_gap % aligner); pgno_align2os_pgno(env, (size_t)with_stockpile_gap + aligner - with_stockpile_gap % aligner);
const pgno_t bottom = (aligned > pending->geometry.lower) ? aligned : pending->geometry.lower; const pgno_t bottom = (aligned > pending->geometry.lower) ? aligned : pending->geometry.lower;
if (pending->geometry.now > bottom) { if (pending->geometry.now > bottom) {
if (TROIKA_HAVE_STEADY(troika)) if (TROIKA_HAVE_STEADY(troika))

View File

@ -164,7 +164,7 @@ retry:;
} }
eASSERT(env, head.txnid == recent_committed_txnid(env)); eASSERT(env, head.txnid == recent_committed_txnid(env));
env->basal_txn->txnid = head.txnid; env->basal_txn->txnid = head.txnid;
txn_snapshot_oldest(env->basal_txn); txn_gc_detent(env->basal_txn);
flags |= txn_shrink_allowed; flags |= txn_shrink_allowed;
} }
@ -524,7 +524,7 @@ __cold int env_close(MDBX_env *env, bool resurrect_after_fork) {
env->defer_free = nullptr; env->defer_free = nullptr;
#endif /* MDBX_ENABLE_DBI_LOCKFREE */ #endif /* MDBX_ENABLE_DBI_LOCKFREE */
if (!(env->flags & MDBX_RDONLY)) if ((env->flags & MDBX_RDONLY) == 0)
osal_ioring_destroy(&env->ioring); osal_ioring_destroy(&env->ioring);
env->lck = nullptr; env->lck = nullptr;

View File

@ -30,8 +30,10 @@ typedef struct iov_ctx iov_ctx_t;
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64) #if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
#define MDBX_WORDBITS 64 #define MDBX_WORDBITS 64
#define MDBX_WORDBITS_LN2 6
#else #else
#define MDBX_WORDBITS 32 #define MDBX_WORDBITS 32
#define MDBX_WORDBITS_LN2 5
#endif /* MDBX_WORDBITS */ #endif /* MDBX_WORDBITS */
#include "options.h" #include "options.h"

View File

@ -570,14 +570,11 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, const size_t s
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
#define ALLOC_COALESCE 4 /* внутреннее состояние */ static inline bool is_reclaimable(MDBX_txn *txn, const MDBX_cursor *mc, const uint8_t flags) {
#define ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
#define ALLOC_LIFO 16 /* внутреннее состояние */
static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, const uint8_t flags) {
/* If txn is updating the GC, then the retired-list cannot play catch-up with /* If txn is updating the GC, then the retired-list cannot play catch-up with
* itself by growing while trying to save it. */ * itself by growing while trying to save it. */
if (mc->tree == &txn->dbs[FREE_DBI] && !(flags & ALLOC_RESERVE) && !(mc->flags & z_gcu_preparation)) STATIC_ASSERT(ALLOC_RESERVE == z_gcu_preparation);
if (mc->tree == &txn->dbs[FREE_DBI] && !((flags | mc->flags) & z_gcu_preparation))
return false; return false;
/* avoid search inside empty tree and while tree is updating, /* avoid search inside empty tree and while tree is updating,
@ -590,8 +587,6 @@ static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, const uint
return true; return true;
} }
static inline bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { return txl_contain(txn->wr.gc.retxl, id); }
__hot static pgno_t repnl_get_single(MDBX_txn *txn) { __hot static pgno_t repnl_get_single(MDBX_txn *txn) {
const size_t len = MDBX_PNL_GETSIZE(txn->wr.repnl); const size_t len = MDBX_PNL_GETSIZE(txn->wr.repnl);
assert(len > 0); assert(len > 0);
@ -721,6 +716,10 @@ __hot static pgno_t repnl_get_sequence(MDBX_txn *txn, const size_t num, uint8_t
return 0; return 0;
} }
bool gc_repnl_has_span(MDBX_txn *txn, const size_t num) {
return (num > 1) ? repnl_get_sequence(txn, num, ALLOC_RESERVE) != 0 : !MDBX_PNL_IS_EMPTY(txn->wr.repnl);
}
static inline pgr_t page_alloc_finalize(MDBX_env *const env, MDBX_txn *const txn, const MDBX_cursor *const mc, static inline pgr_t page_alloc_finalize(MDBX_env *const env, MDBX_txn *const txn, const MDBX_cursor *const mc,
const pgno_t pgno, const size_t num) { const pgno_t pgno, const size_t num) {
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
@ -842,6 +841,13 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
prof->spe_counter += 1; prof->spe_counter += 1;
#endif /* MDBX_ENABLE_PROFGC */ #endif /* MDBX_ENABLE_PROFGC */
/* Если взведен флажок ALLOC_RESERVE, то требуется только обеспечение соответствующего резерва в txn->wr.repnl
* и/или txn->wr.gc.reclaimed, но без выделения и возврата страницы. При этом возможны три варианта вызова:
* 1. num == 0 требуется слот для возврата в GC остатков ранее переработанных/извлеченных страниц,
* при этом нет смысла перерабатывать длинные записи, так как тогда дефицит свободных id/слотов не уменьшится;
* 2. num == 1 требуется увеличение резерва перед обновлением GC;
* 3. num > 1 требуется последовательность страниц для сохранения retired-страниц
* при выключенном MDBX_ENABLE_BIGFOOT. */
eASSERT(env, num > 0 || (flags & ALLOC_RESERVE)); eASSERT(env, num > 0 || (flags & ALLOC_RESERVE));
eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
@ -866,13 +872,12 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
goto done; goto done;
} }
} else { } else {
eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->wr.repnl) == 0); eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->wr.repnl) == 0 || (flags & ALLOC_RESERVE));
eASSERT(env, !(flags & ALLOC_RESERVE) || num == 0);
} }
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
if (unlikely(!is_gc_usable(txn, mc, flags))) { if (unlikely(!is_reclaimable(txn, mc, flags))) {
eASSERT(env, (txn->flags & txn_gc_drained) || num > 1); eASSERT(env, (txn->flags & txn_gc_drained) || num > 1);
goto no_gc; goto no_gc;
} }
@ -880,21 +885,18 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
eASSERT(env, (flags & (ALLOC_COALESCE | ALLOC_LIFO | ALLOC_SHOULD_SCAN)) == 0); eASSERT(env, (flags & (ALLOC_COALESCE | ALLOC_LIFO | ALLOC_SHOULD_SCAN)) == 0);
flags += (env->flags & MDBX_LIFORECLAIM) ? ALLOC_LIFO : 0; flags += (env->flags & MDBX_LIFORECLAIM) ? ALLOC_LIFO : 0;
if (/* Не коагулируем записи при подготовке резерва для обновления GC. /* Не коагулируем записи в случае запроса слота для возврата страниц в GC. Иначе попытка увеличить резерв
* Иначе попытка увеличить резерв может приводить к необходимости ещё * может приводить к необходимости ещё большего резерва из-за увеличения списка переработанных страниц. */
* большего резерва из-за увеличения списка переработанных страниц. */ if (num > 0 && txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->wr.repnl) < env->maxgc_large1page / 2)
(flags & ALLOC_RESERVE) == 0) { flags += ALLOC_COALESCE;
if (txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->wr.repnl) < env->maxgc_large1page / 2)
flags += ALLOC_COALESCE;
}
MDBX_cursor *const gc = ptr_disp(env->basal_txn, sizeof(MDBX_txn)); MDBX_cursor *const gc = txn_gc_cursor(txn);
eASSERT(env, mc != gc && gc->next == gc); eASSERT(env, mc != gc && gc->next == gc);
gc->txn = txn; gc->txn = txn;
gc->dbi_state = txn->dbi_state; gc->dbi_state = txn->dbi_state;
gc->top_and_flags = z_fresh_mark; gc->top_and_flags = z_fresh_mark;
txn->wr.prefault_write_activated = env->options.prefault_write; txn->wr.prefault_write_activated = !env->incore && env->options.prefault_write;
if (txn->wr.prefault_write_activated) { if (txn->wr.prefault_write_activated) {
/* Проверка посредством minicore() существенно снижает затраты, но в /* Проверка посредством minicore() существенно снижает затраты, но в
* простейших случаях (тривиальный бенчмарк) интегральная производительность * простейших случаях (тривиальный бенчмарк) интегральная производительность
@ -911,45 +913,38 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
txn->wr.prefault_write_activated = false; txn->wr.prefault_write_activated = false;
} }
retry_gc_refresh_oldest:; retry_gc_refresh_detent:
txnid_t oldest = txn_snapshot_oldest(txn); txn_gc_detent(txn);
retry_gc_have_oldest: retry_gc_have_detent:
if (unlikely(oldest >= txn->txnid)) { if (unlikely(txn->env->gc.detent >= txn->txnid)) {
ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN " for current-txnid %" PRIaTXN, oldest, txn->txnid); FATAL("unexpected/invalid gc-detent %" PRIaTXN " for current-txnid %" PRIaTXN, txn->env->gc.detent, txn->txnid);
ret.err = MDBX_PROBLEM; ret.err = MDBX_PROBLEM;
goto fail; goto fail;
} }
const txnid_t detent = oldest + 1;
txnid_t id = 0; txnid_t id = 0;
MDBX_cursor_op op = MDBX_FIRST; MDBX_cursor_op op = MDBX_FIRST;
if (flags & ALLOC_LIFO) { if (flags & ALLOC_LIFO) {
if (!txn->wr.gc.retxl) {
txn->wr.gc.retxl = txl_alloc();
if (unlikely(!txn->wr.gc.retxl)) {
ret.err = MDBX_ENOMEM;
goto fail;
}
}
/* Begin lookup backward from oldest reader */ /* Begin lookup backward from oldest reader */
id = detent - 1; id = txn->env->gc.detent;
op = MDBX_SET_RANGE; op = MDBX_SET_RANGE;
} else if (txn->wr.gc.last_reclaimed) { } else {
/* Continue lookup forward from last-reclaimed */ /* Continue lookup forward from last-reclaimed */
id = txn->wr.gc.last_reclaimed + 1; id = rkl_highest(&txn->wr.gc.reclaimed);
if (id >= detent) if (id) {
goto depleted_gc; id += 1;
op = MDBX_SET_RANGE; op = MDBX_SET_RANGE;
if (id >= txn->env->gc.detent)
goto depleted_gc;
}
} }
next_gc:; next_gc:
MDBX_val key;
key.iov_base = &id;
key.iov_len = sizeof(id);
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
prof->rsteps += 1; prof->rsteps += 1
#endif /* MDBX_ENABLE_PROFGC */ #endif /* MDBX_ENABLE_PROFGC */
;
MDBX_val key = {.iov_base = &id, .iov_len = sizeof(id)};
/* Seek first/next GC record */ /* Seek first/next GC record */
ret.err = cursor_ops(gc, &key, nullptr, op); ret.err = cursor_ops(gc, &key, nullptr, op);
@ -967,15 +962,18 @@ next_gc:;
ret.err = MDBX_CORRUPTED; ret.err = MDBX_CORRUPTED;
goto fail; goto fail;
} }
id = unaligned_peek_u64(4, key.iov_base); id = unaligned_peek_u64(4, key.iov_base);
if (flags & ALLOC_LIFO) { if (flags & ALLOC_LIFO) {
op = MDBX_PREV; op = MDBX_PREV;
if (id >= detent || is_already_reclaimed(txn, id)) if (id >= txn->env->gc.detent || gc_is_reclaimed(txn, id))
goto next_gc; goto next_gc;
} else { } else {
op = MDBX_NEXT; if (unlikely(id >= txn->env->gc.detent))
if (unlikely(id >= detent))
goto depleted_gc; goto depleted_gc;
op = MDBX_NEXT;
if (gc_is_reclaimed(txn, id))
goto next_gc;
} }
txn->flags &= ~txn_gc_drained; txn->flags &= ~txn_gc_drained;
@ -996,12 +994,23 @@ next_gc:;
const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl); const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl)); TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl));
if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl) >= env->maxgc_large1page)) { if (unlikely(!num)) {
/* Don't try to coalesce too much. */ /* TODO: Проверка критериев пункта 2 сформулированного в gc_provide_slots().
* Сейчас тут сильно упрощенная и не совсем верная проверка, так как пока недоступна информация о кол-ве имеющихся
* слотов и их дефиците для возврата wr.repl. */
if (gc_len > env->maxgc_large1page / 4 * 3
/* если запись достаточно длинная, то переработка слота не особо увеличит место для возврата wr.repl, и т.п. */
&& MDBX_PNL_GETSIZE(txn->wr.repnl) + gc_len > env->maxgc_large1page /* не помещается в хвост */) {
DEBUG("avoid reclaiming %" PRIaTXN " slot, since it is too long (%zu)", id, gc_len);
ret.err = MDBX_NOTFOUND;
goto reserve_done;
}
}
if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl) /* Don't try to coalesce too much. */ >=
env->maxgc_large1page)) {
if (flags & ALLOC_SHOULD_SCAN) { if (flags & ALLOC_SHOULD_SCAN) {
eASSERT(env, flags & ALLOC_COALESCE); eASSERT(env, (flags & ALLOC_COALESCE) /* && !(flags & ALLOC_RESERVE) */ && num > 0);
eASSERT(env, !(flags & ALLOC_RESERVE));
eASSERT(env, num > 0);
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
env->lck->pgops.gc_prof.coalescences += 1; env->lck->pgops.gc_prof.coalescences += 1;
#endif /* MDBX_ENABLE_PROFGC */ #endif /* MDBX_ENABLE_PROFGC */
@ -1010,25 +1019,25 @@ next_gc:;
eASSERT(env, MDBX_PNL_LAST(txn->wr.repnl) < txn->geo.first_unallocated && eASSERT(env, MDBX_PNL_LAST(txn->wr.repnl) < txn->geo.first_unallocated &&
MDBX_PNL_FIRST(txn->wr.repnl) < txn->geo.first_unallocated); MDBX_PNL_FIRST(txn->wr.repnl) < txn->geo.first_unallocated);
if (likely(num == 1)) { if (likely(num == 1)) {
pgno = repnl_get_single(txn); pgno = (flags & ALLOC_RESERVE) ? P_INVALID : repnl_get_single(txn);
goto done; goto done;
} }
pgno = repnl_get_sequence(txn, num, flags); pgno = repnl_get_sequence(txn, num, flags);
if (likely(pgno)) if (likely(pgno))
goto done; goto done;
} }
flags -= ALLOC_COALESCE | ALLOC_SHOULD_SCAN;
} }
flags &= ~(ALLOC_COALESCE | ALLOC_SHOULD_SCAN);
if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(txn->wr.repnl) >= env->options.rp_augment_limit) && if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(txn->wr.repnl) >= env->options.rp_augment_limit) &&
((/* not a slot-request from gc-update */ num && ((/* not a slot-request from gc-update */ num &&
/* have enough unallocated space */ txn->geo.upper >= txn->geo.first_unallocated + num && /* have enough unallocated space */ txn->geo.upper >= txn->geo.first_unallocated + num &&
monotime_since_cached(monotime_begin, &now_cache) + txn->wr.gc.time_acc >= env->options.gc_time_limit) || monotime_since_cached(monotime_begin, &now_cache) + txn->wr.gc.spent >= env->options.gc_time_limit) ||
gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl) >= PAGELIST_LIMIT)) { gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl) >= PAGELIST_LIMIT)) {
/* Stop reclaiming to avoid large/overflow the page list. This is a rare /* Stop reclaiming to avoid large/overflow the page list. This is a rare
* case while search for a continuously multi-page region in a * case while search for a continuously multi-page region in a large database,
* large database, see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */ * see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */
NOTICE("stop reclaiming %s: %zu (current) + %zu " NOTICE("stop reclaiming %s: %zu (current) + %zu "
"(chunk) -> %zu, rp_augment_limit %u", "(chunk) >= %zu, rp_augment_limit %u",
likely(gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl) < PAGELIST_LIMIT) ? "since rp_augment_limit was reached" likely(gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl) < PAGELIST_LIMIT) ? "since rp_augment_limit was reached"
: "to avoid PNL overflow", : "to avoid PNL overflow",
MDBX_PNL_GETSIZE(txn->wr.repnl), gc_len, gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl), MDBX_PNL_GETSIZE(txn->wr.repnl), gc_len, gc_len + MDBX_PNL_GETSIZE(txn->wr.repnl),
@ -1038,12 +1047,17 @@ next_gc:;
} }
/* Remember ID of readed GC record */ /* Remember ID of readed GC record */
txn->wr.gc.last_reclaimed = id; ret.err = rkl_push(&txn->wr.gc.reclaimed, id,
if (flags & ALLOC_LIFO) { false /* Вместо false, тут можно передавать/использовать (flags & ALLOC_LIFO) == 0, тогда
ret.err = txl_append(&txn->wr.gc.retxl, id); * дыры/пропуски в идентификаторах GC будут образовывать непрерывные интервалы в wr.gc.reclaimed,
if (unlikely(ret.err != MDBX_SUCCESS)) * что обеспечит больше свободных идентификаторов/слотов для возврата страниц. Однако, это
goto fail; * также приведёт к пустым попыткам удаления отсутствующих записей в gc_clear_reclaimed(),
} * а далее к перекладыванию этих сплошных интервалов поэлементно в ready4reuse.
* Поэтому смысла в этом решительно нет. Следует либо формировать сплошные интервалы при
* работе gc_clear_reclaimed(), особенно в FIFO-режиме, либо искать их только в gc_provide_ids() */);
TRACE("%" PRIaTXN " len %zu pushed to txn-rkl, err %d", id, gc_len, ret.err);
if (unlikely(ret.err != MDBX_SUCCESS))
goto fail;
/* Append PNL from GC record to wr.repnl */ /* Append PNL from GC record to wr.repnl */
ret.err = pnl_need(&txn->wr.repnl, gc_len); ret.err = pnl_need(&txn->wr.repnl, gc_len);
@ -1087,22 +1101,25 @@ next_gc:;
} }
eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
/* Done for a kick-reclaim mode, actually no page needed */ /* TODO: удаление загруженных из GC записей */
if (unlikely(num == 0)) {
eASSERT(env, ret.err == MDBX_SUCCESS);
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, MDBX_PNL_GETSIZE(txn->wr.repnl));
goto early_exit;
}
/* TODO: delete reclaimed records */
eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
if (flags & ALLOC_COALESCE) { if (flags & ALLOC_COALESCE) {
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, MDBX_PNL_GETSIZE(txn->wr.repnl)); if (MDBX_PNL_GETSIZE(txn->wr.repnl) < env->maxgc_large1page / 2) {
goto next_gc; TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, MDBX_PNL_GETSIZE(txn->wr.repnl));
goto next_gc;
}
flags -= ALLOC_COALESCE;
} }
scan: scan:
if ((flags & ALLOC_RESERVE) && num < 2) {
/* Если был нужен только slot/id для gc_reclaim_slot() или gc_reserve4stockpile() */
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "reserve-done", id, MDBX_PNL_GETSIZE(txn->wr.repnl));
ret.err = MDBX_SUCCESS;
goto reserve_done;
}
eASSERT(env, flags & ALLOC_SHOULD_SCAN); eASSERT(env, flags & ALLOC_SHOULD_SCAN);
eASSERT(env, num > 0); eASSERT(env, num > 0);
if (MDBX_PNL_GETSIZE(txn->wr.repnl) >= num) { if (MDBX_PNL_GETSIZE(txn->wr.repnl) >= num) {
@ -1118,17 +1135,16 @@ scan:
goto done; goto done;
} }
flags -= ALLOC_SHOULD_SCAN; flags -= ALLOC_SHOULD_SCAN;
if (ret.err == MDBX_SUCCESS) { if ((txn->flags & txn_gc_drained) == 0) {
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, MDBX_PNL_GETSIZE(txn->wr.repnl)); TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, MDBX_PNL_GETSIZE(txn->wr.repnl));
goto next_gc; goto next_gc;
} }
depleted_gc: depleted_gc:
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, MDBX_PNL_GETSIZE(txn->wr.repnl)); TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, MDBX_PNL_GETSIZE(txn->wr.repnl));
ret.err = MDBX_NOTFOUND; txn->flags |= txn_gc_drained;
if (flags & ALLOC_SHOULD_SCAN) if (flags & ALLOC_SHOULD_SCAN)
goto scan; goto scan;
txn->flags |= txn_gc_drained;
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
@ -1145,9 +1161,9 @@ depleted_gc:
/* Does reclaiming stopped at the last steady point? */ /* Does reclaiming stopped at the last steady point? */
const meta_ptr_t recent = meta_recent(env, &txn->wr.troika); const meta_ptr_t recent = meta_recent(env, &txn->wr.troika);
const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->wr.troika); const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->wr.troika);
if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && detent == prefer_steady.txnid + 1) { if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && txn->env->gc.detent == prefer_steady.txnid) {
DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN "-%s, detent %" PRIaTXN, recent.txnid, DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN "-%s", recent.txnid, durable_caption(recent.ptr_c),
durable_caption(recent.ptr_c), prefer_steady.txnid, durable_caption(prefer_steady.ptr_c), detent); prefer_steady.txnid, durable_caption(prefer_steady.ptr_c));
const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed); const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed);
const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed); const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed);
uint64_t eoos_timestamp; uint64_t eoos_timestamp;
@ -1166,12 +1182,12 @@ depleted_gc:
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
env->lck->pgops.gc_prof.wipes += 1; env->lck->pgops.gc_prof.wipes += 1;
#endif /* MDBX_ENABLE_PROFGC */ #endif /* MDBX_ENABLE_PROFGC */
ret.err = meta_wipe_steady(env, detent); ret.err = meta_wipe_steady(env, txn->env->gc.detent);
DEBUG("gc-wipe-steady, rc %d", ret.err); DEBUG("gc-wipe-steady, rc %d", ret.err);
if (unlikely(ret.err != MDBX_SUCCESS)) if (unlikely(ret.err != MDBX_SUCCESS))
goto fail; goto fail;
eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->wr.troika).ptr_c); eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->wr.troika).ptr_c);
goto retry_gc_refresh_oldest; goto retry_gc_refresh_detent;
} }
if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) || if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) ||
(autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) && (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) &&
@ -1189,15 +1205,12 @@ depleted_gc:
if (unlikely(ret.err != MDBX_SUCCESS)) if (unlikely(ret.err != MDBX_SUCCESS))
goto fail; goto fail;
eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->wr.troika).ptr_c); eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->wr.troika).ptr_c);
goto retry_gc_refresh_oldest; goto retry_gc_refresh_detent;
} }
} }
if (unlikely(true == atomic_load32(&env->lck->rdt_refresh_flag, mo_AcquireRelease))) { if (unlikely(true == atomic_load32(&env->lck->rdt_refresh_flag, mo_AcquireRelease)) && txn_gc_detent(txn))
oldest = txn_snapshot_oldest(txn); goto retry_gc_have_detent;
if (oldest >= detent)
goto retry_gc_have_oldest;
}
/* Avoid kick lagging reader(s) if is enough unallocated space /* Avoid kick lagging reader(s) if is enough unallocated space
* at the end of database file. */ * at the end of database file. */
@ -1206,11 +1219,8 @@ depleted_gc:
goto done; goto done;
} }
if (oldest < txn->txnid - xMDBX_TXNID_STEP) { if (txn->txnid - txn->env->gc.detent > xMDBX_TXNID_STEP && mvcc_kick_laggards(env, txn->env->gc.detent))
oldest = mvcc_kick_laggards(env, oldest); goto retry_gc_refresh_detent;
if (oldest >= detent)
goto retry_gc_have_oldest;
}
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
@ -1277,30 +1287,40 @@ done:
eASSERT(env, ret.err != MDBX_SUCCESS); eASSERT(env, ret.err != MDBX_SUCCESS);
eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
int level; int level;
const char *what; if (flags & ALLOC_UNIMPORTANT)
if (flags & ALLOC_RESERVE) { level = MDBX_LOG_DEBUG;
level = (flags & ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE; else if (flags & ALLOC_RESERVE)
what = num ? "reserve-pages" : "fetch-slot"; level = MDBX_LOG_NOTICE;
} else { else {
txn->flags |= MDBX_TXN_ERROR; txn->flags |= MDBX_TXN_ERROR;
level = MDBX_LOG_ERROR; level = MDBX_LOG_ERROR;
what = "pages";
} }
if (LOG_ENABLED(level)) if (LOG_ENABLED(level)) {
debug_log(level, __func__, __LINE__, if (num)
"unable alloc %zu %s, alloc-flags 0x%x, err %d, txn-flags " debug_log(level, __func__, __LINE__,
"0x%x, re-list-len %zu, loose-count %zu, gc: height %u, " "unable %s %zu, alloc-flags 0x%x, err %d, txn-flags "
"branch %zu, leaf %zu, large %zu, entries %zu\n", "0x%x, re-list-len %zu, loose-count %zu, gc: height %u, "
num, what, flags, ret.err, txn->flags, MDBX_PNL_GETSIZE(txn->wr.repnl), txn->wr.loose_count, "branch %zu, leaf %zu, large %zu, entries %zu\n",
txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages, (flags & ALLOC_RESERVE) ? "reserve" : "alloc", num, flags, ret.err, txn->flags,
(size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages, MDBX_PNL_GETSIZE(txn->wr.repnl), txn->wr.loose_count, txn->dbs[FREE_DBI].height,
(size_t)txn->dbs[FREE_DBI].items); (size_t)txn->dbs[FREE_DBI].branch_pages, (size_t)txn->dbs[FREE_DBI].leaf_pages,
(size_t)txn->dbs[FREE_DBI].large_pages, (size_t)txn->dbs[FREE_DBI].items);
else
debug_log(level, __func__, __LINE__,
"unable fetch-slot, alloc-flags 0x%x, err %d, txn-flags "
"0x%x, re-list-len %zu, loose-count %zu, gc: height %u, "
"branch %zu, leaf %zu, large %zu, entries %zu\n",
flags, ret.err, txn->flags, MDBX_PNL_GETSIZE(txn->wr.repnl), txn->wr.loose_count,
txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages,
(size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages,
(size_t)txn->dbs[FREE_DBI].items);
}
ret.page = nullptr; ret.page = nullptr;
} }
if (num > 1) if (num > 1)
txn->wr.gc.time_acc += monotime_since_cached(monotime_begin, &now_cache); txn->wr.gc.spent += monotime_since_cached(monotime_begin, &now_cache);
} else { } else {
early_exit: reserve_done:
DEBUG("return nullptr for %zu pages for ALLOC_%s, rc %d", num, num ? "RESERVE" : "SLOT", ret.err); DEBUG("return nullptr for %zu pages for ALLOC_%s, rc %d", num, num ? "RESERVE" : "SLOT", ret.err);
ret.page = nullptr; ret.page = nullptr;
} }

File diff suppressed because it is too large Load Diff

View File

@ -5,14 +5,37 @@
#include "essentials.h" #include "essentials.h"
/* Гистограмма решения нарезки фрагментов для ситуации нехватки идентификаторов/слотов. */
typedef struct gc_dense_histogram {
/* Размер массива одновременно задаёт максимальный размер последовательностей,
* с которыми решается задача распределения.
*
* Использование длинных последовательностей контрпродуктивно, так как такие последовательности будут
* создавать/воспроизводить/повторять аналогичные затруднения при последующей переработке. Однако,
* в редких ситуациях это может быть единственным выходом. */
unsigned end;
pgno_t array[31];
} gc_dense_histogram_t;
typedef struct gc_update_context { typedef struct gc_update_context {
unsigned loop; unsigned loop;
pgno_t prev_first_unallocated; unsigned goodchunk;
bool dense; bool dense;
size_t reserve_adj; pgno_t prev_first_unallocated;
size_t retired_stored; size_t retired_stored;
size_t amount, reserved, cleaned_slot, reused_slot, fill_idx; size_t return_reserved_lo, return_reserved_hi;
txnid_t cleaned_id, rid; txnid_t gc_first;
intptr_t return_left;
#ifndef MDBX_DEBUG_GCU
#define MDBX_DEBUG_GCU 0
#endif
#if MDBX_DEBUG_GCU
struct {
txnid_t prev;
unsigned n;
} dbg;
#endif /* MDBX_DEBUG_GCU */
rkl_t ready4reuse, sequel;
#if MDBX_ENABLE_BIGFOOT #if MDBX_ENABLE_BIGFOOT
txnid_t bigfoot; txnid_t bigfoot;
#endif /* MDBX_ENABLE_BIGFOOT */ #endif /* MDBX_ENABLE_BIGFOOT */
@ -20,21 +43,34 @@ typedef struct gc_update_context {
MDBX_cursor cursor; MDBX_cursor cursor;
cursor_couple_t couple; cursor_couple_t couple;
}; };
gc_dense_histogram_t dense_histogram;
} gcu_t; } gcu_t;
static inline int gc_update_init(MDBX_txn *txn, gcu_t *ctx) { MDBX_INTERNAL int gc_put_init(MDBX_txn *txn, gcu_t *ctx);
memset(ctx, 0, offsetof(gcu_t, cursor)); MDBX_INTERNAL void gc_put_destroy(gcu_t *ctx);
ctx->dense = txn->txnid <= MIN_TXNID;
#if MDBX_ENABLE_BIGFOOT #define ALLOC_DEFAULT 0 /* штатное/обычное выделение страниц */
ctx->bigfoot = txn->txnid; #define ALLOC_UNIMPORTANT 1 /* запрос неважен, невозможность выделения не приведет к ошибке транзакции */
#endif /* MDBX_ENABLE_BIGFOOT */ #define ALLOC_RESERVE 2 /* подготовка резерва для обновления GC, без аллокации */
return cursor_init(&ctx->cursor, txn, FREE_DBI); #define ALLOC_COALESCE 4 /* внутреннее состояние/флажок */
} #define ALLOC_SHOULD_SCAN 8 /* внутреннее состояние/флажок */
#define ALLOC_LIFO 16 /* внутреннее состояние/флажок */
#define ALLOC_DEFAULT 0
#define ALLOC_RESERVE 1
#define ALLOC_UNIMPORTANT 2
MDBX_INTERNAL pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags); MDBX_INTERNAL pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags);
MDBX_INTERNAL pgr_t gc_alloc_single(const MDBX_cursor *const mc); MDBX_INTERNAL pgr_t gc_alloc_single(const MDBX_cursor *const mc);
MDBX_INTERNAL int gc_update(MDBX_txn *txn, gcu_t *ctx); MDBX_INTERNAL int gc_update(MDBX_txn *txn, gcu_t *ctx);
MDBX_NOTHROW_PURE_FUNCTION static inline size_t gc_stockpile(const MDBX_txn *txn) {
return MDBX_PNL_GETSIZE(txn->wr.repnl) + txn->wr.loose_count;
}
MDBX_INTERNAL bool gc_repnl_has_span(MDBX_txn *txn, const size_t num);
static inline bool gc_is_reclaimed(const MDBX_txn *txn, const txnid_t id) {
return rkl_contain(&txn->wr.gc.reclaimed, id) || rkl_contain(&txn->wr.gc.comeback, id);
}
static inline txnid_t txnid_min(txnid_t a, txnid_t b) { return (a < b) ? a : b; }
static inline txnid_t txnid_max(txnid_t a, txnid_t b) { return (a > b) ? a : b; }

View File

@ -214,10 +214,9 @@ struct MDBX_txn {
troika_t troika; troika_t troika;
pnl_t __restrict repnl; /* Reclaimed GC pages */ pnl_t __restrict repnl; /* Reclaimed GC pages */
struct { struct {
/* The list of reclaimed txn-ids from GC */ rkl_t reclaimed; /* The list of reclaimed txn-ids from GC */
txl_t __restrict retxl; uint64_t spent; /* Time spent reading and searching GC */
txnid_t last_reclaimed; /* ID of last used record */ rkl_t comeback; /* The list of ids of records returned into GC during commit, etc */
uint64_t time_acc;
} gc; } gc;
bool prefault_write_activated; bool prefault_write_activated;
#if MDBX_ENABLE_REFUND #if MDBX_ENABLE_REFUND
@ -287,13 +286,14 @@ struct MDBX_cursor {
}; };
/* флаги проверки, в том числе биты для проверки типа листовых страниц. */ /* флаги проверки, в том числе биты для проверки типа листовых страниц. */
uint8_t checking; uint8_t checking;
uint8_t pad;
/* Указывает на txn->dbi_state[] для DBI этого курсора. /* Указывает на txn->dbi_state[] для DBI этого курсора.
* Модификатор __restrict тут полезен и безопасен в текущем понимании, * Модификатор __restrict тут полезен и безопасен в текущем понимании,
* так как пересечение возможно только с dbi_state транзакции, * так как пересечение возможно только с dbi_state транзакции,
* и происходит по-чтению до последующего изменения/записи. */ * и происходит по-чтению до последующего изменения/записи. */
uint8_t *__restrict dbi_state; uint8_t *__restrict dbi_state;
/* Связь списка отслеживания курсоров в транзакции */ /* Связь списка отслеживания курсоров в транзакции. */
MDBX_txn *txn; MDBX_txn *txn;
/* Указывает на tree->dbs[] для DBI этого курсора. */ /* Указывает на tree->dbs[] для DBI этого курсора. */
tree_t *tree; tree_t *tree;
@ -362,15 +362,14 @@ struct MDBX_env {
uint16_t subpage_reserve_prereq; uint16_t subpage_reserve_prereq;
uint16_t subpage_reserve_limit; uint16_t subpage_reserve_limit;
atomic_pgno_t mlocked_pgno; atomic_pgno_t mlocked_pgno;
uint8_t ps2ln; /* log2 of DB page size */ uint8_t ps2ln; /* log2 of DB page size */
int8_t stuck_meta; /* recovery-only: target meta page or less that zero */ int8_t stuck_meta; /* recovery-only: target meta page or less that zero */
uint16_t merge_threshold, merge_threshold_gc; /* pages emptier than this are uint16_t merge_threshold; /* pages emptier than this are candidates for merging */
candidates for merging */ unsigned max_readers; /* size of the reader table */
unsigned max_readers; /* size of the reader table */ MDBX_dbi max_dbi; /* size of the DB table */
MDBX_dbi max_dbi; /* size of the DB table */ uint32_t pid; /* process ID of this env */
uint32_t pid; /* process ID of this env */ osal_thread_key_t me_txkey; /* thread-key for readers */
osal_thread_key_t me_txkey; /* thread-key for readers */ struct { /* path to the DB files */
struct { /* path to the DB files */
pathchar_t *lck, *dxb, *specified; pathchar_t *lck, *dxb, *specified;
void *buffer; void *buffer;
} pathname; } pathname;
@ -467,6 +466,9 @@ struct MDBX_env {
/* --------------------------------------------------- mostly volatile part */ /* --------------------------------------------------- mostly volatile part */
MDBX_txn *txn; /* current write transaction */ MDBX_txn *txn; /* current write transaction */
struct {
txnid_t detent;
} gc;
osal_fastmutex_t dbi_lock; osal_fastmutex_t dbi_lock;
unsigned n_dbi; /* number of DBs opened */ unsigned n_dbi; /* number of DBs opened */
@ -549,11 +551,7 @@ MDBX_MAYBE_UNUSED static void static_checks(void) {
STATIC_ASSERT(sizeof(clc_t) == 3 * sizeof(void *)); STATIC_ASSERT(sizeof(clc_t) == 3 * sizeof(void *));
STATIC_ASSERT(sizeof(kvx_t) == 8 * sizeof(void *)); STATIC_ASSERT(sizeof(kvx_t) == 8 * sizeof(void *));
#if MDBX_WORDBITS == 64 #define KVX_SIZE_LN2 MDBX_WORDBITS_LN2
#define KVX_SIZE_LN2 6
#else
#define KVX_SIZE_LN2 5
#endif
STATIC_ASSERT(sizeof(kvx_t) == (1u << KVX_SIZE_LN2)); STATIC_ASSERT(sizeof(kvx_t) == (1u << KVX_SIZE_LN2));
} }
#endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */ #endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */

View File

@ -300,7 +300,7 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, int *d
return rc; return rc;
} }
__cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) { __cold bool mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) {
DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler); DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler);
osal_memory_fence(mo_AcquireRelease, false); osal_memory_fence(mo_AcquireRelease, false);
MDBX_hsr_func *const callback = env->hsr_callback; MDBX_hsr_func *const callback = env->hsr_callback;
@ -410,5 +410,5 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) {
NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, straggler, oldest, turn); NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, straggler, oldest, turn);
callback(env, env->txn, 0, 0, straggler, (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry); callback(env, env->txn, 0, 0, straggler, (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry);
} }
return oldest; return oldest > straggler;
} }

View File

@ -56,7 +56,7 @@ typedef const pgno_t *const_pnl_t;
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t)) #define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0) #define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) { MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) {
assert(size > 0 && size <= PAGELIST_LIMIT); assert(size > 0 && size <= PAGELIST_LIMIT);
#if MDBX_PNL_PREALLOC_FOR_RADIXSORT #if MDBX_PNL_PREALLOC_FOR_RADIXSORT
@ -71,7 +71,7 @@ MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) {
return bytes; return bytes;
} }
MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size(const size_t bytes) { MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size(const size_t bytes) {
size_t size = bytes / sizeof(pgno_t); size_t size = bytes / sizeof(pgno_t);
assert(size > 3 && size <= PAGELIST_LIMIT + /* alignment gap */ 65536); assert(size > 3 && size <= PAGELIST_LIMIT + /* alignment gap */ 65536);
size -= 3; size -= 3;
@ -114,7 +114,7 @@ MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, pg
MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n); MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n);
MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno); MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno);
MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl); MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl);
@ -130,7 +130,8 @@ MDBX_MAYBE_UNUSED static inline void pnl_sort(pnl_t pnl, size_t limit4check) {
(void)limit4check; (void)limit4check;
} }
MDBX_MAYBE_UNUSED static inline size_t pnl_search(const pnl_t pnl, pgno_t pgno, size_t limit) { MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline size_t pnl_search(const pnl_t pnl, pgno_t pgno,
size_t limit) {
assert(pnl_check_allocated(pnl, limit)); assert(pnl_check_allocated(pnl, limit));
if (MDBX_HAVE_CMOV) { if (MDBX_HAVE_CMOV) {
/* cmov-ускоренный бинарный поиск может читать (но не использовать) один /* cmov-ускоренный бинарный поиск может читать (но не использовать) один

View File

@ -15,9 +15,8 @@ MDBX_INTERNAL bsr_t mvcc_bind_slot(MDBX_env *env);
MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest); MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest);
MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady); MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady);
MDBX_INTERNAL pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page); MDBX_INTERNAL pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page);
MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler);
MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rlocked, int *dead); MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rlocked, int *dead);
MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t laggard); MDBX_INTERNAL bool mvcc_kick_laggards(MDBX_env *env, const txnid_t laggard);
/* dxb.c */ /* dxb.c */
MDBX_INTERNAL int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits); MDBX_INTERNAL int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits);
@ -62,10 +61,11 @@ struct commit_timestamp {
}; };
MDBX_INTERNAL bool txn_refund(MDBX_txn *txn); MDBX_INTERNAL bool txn_refund(MDBX_txn *txn);
MDBX_INTERNAL txnid_t txn_snapshot_oldest(const MDBX_txn *const txn); MDBX_INTERNAL bool txn_gc_detent(const MDBX_txn *const txn);
MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits); MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits);
MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn); MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn);
MDBX_INTERNAL int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi); MDBX_INTERNAL int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi);
MDBX_INTERNAL MDBX_cursor *txn_gc_cursor(MDBX_txn *txn);
MDBX_INTERNAL MDBX_txn *txn_alloc(const MDBX_txn_flags_t flags, MDBX_env *env); MDBX_INTERNAL MDBX_txn *txn_alloc(const MDBX_txn_flags_t flags, MDBX_env *env);
MDBX_INTERNAL int txn_abort(MDBX_txn *txn); MDBX_INTERNAL int txn_abort(MDBX_txn *txn);

View File

@ -33,6 +33,7 @@ typedef struct MDBX_rkl {
MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_init(rkl_t *rkl); MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_init(rkl_t *rkl);
MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_clear(rkl_t *rkl); MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_clear(rkl_t *rkl);
static inline void rkl_clear_and_shrink(rkl_t *rkl) { rkl_clear(rkl); /* TODO */ }
MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_destroy(rkl_t *rkl); MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_destroy(rkl_t *rkl);
MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_destructive_move(rkl_t *dst, rkl_t *src); MDBX_MAYBE_UNUSED MDBX_INTERNAL void rkl_destructive_move(rkl_t *dst, rkl_t *src);
MDBX_MAYBE_UNUSED MDBX_INTERNAL __must_check_result int rkl_copy(const rkl_t *src, rkl_t *dst); MDBX_MAYBE_UNUSED MDBX_INTERNAL __must_check_result int rkl_copy(const rkl_t *src, rkl_t *dst);

View File

@ -63,14 +63,14 @@ static int txl_reserve(txl_t __restrict *__restrict ptxl, const size_t wanna) {
return MDBX_ENOMEM; return MDBX_ENOMEM;
} }
static __always_inline int __must_check_result txl_need(txl_t __restrict *__restrict ptxl, size_t num) { static inline int __must_check_result txl_need(txl_t __restrict *__restrict ptxl, size_t num) {
assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl)); assert(MDBX_PNL_GETSIZE(*ptxl) <= txl_max && MDBX_PNL_ALLOCLEN(*ptxl) >= MDBX_PNL_GETSIZE(*ptxl));
assert(num <= PAGELIST_LIMIT); assert(num <= PAGELIST_LIMIT);
const size_t wanna = (size_t)MDBX_PNL_GETSIZE(*ptxl) + num; const size_t wanna = (size_t)MDBX_PNL_GETSIZE(*ptxl) + num;
return likely(MDBX_PNL_ALLOCLEN(*ptxl) >= wanna) ? MDBX_SUCCESS : txl_reserve(ptxl, wanna); return likely(MDBX_PNL_ALLOCLEN(*ptxl) >= wanna) ? MDBX_SUCCESS : txl_reserve(ptxl, wanna);
} }
static __always_inline void txl_xappend(txl_t __restrict txl, txnid_t id) { static inline void txl_xappend(txl_t __restrict txl, txnid_t id) {
assert(MDBX_PNL_GETSIZE(txl) < MDBX_PNL_ALLOCLEN(txl)); assert(MDBX_PNL_GETSIZE(txl) < MDBX_PNL_ALLOCLEN(txl));
txl[0] += 1; txl[0] += 1;
MDBX_PNL_LAST(txl) = id; MDBX_PNL_LAST(txl) = id;

View File

@ -15,12 +15,12 @@ enum txl_rules {
txl_max = (1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t) txl_max = (1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t)
}; };
MDBX_INTERNAL txl_t txl_alloc(void); MDBX_MAYBE_UNUSED MDBX_INTERNAL txl_t txl_alloc(void);
MDBX_INTERNAL void txl_free(txl_t txl); MDBX_MAYBE_UNUSED MDBX_INTERNAL void txl_free(txl_t txl);
MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id); MDBX_MAYBE_UNUSED MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id);
MDBX_INTERNAL void txl_sort(txl_t txl); MDBX_MAYBE_UNUSED MDBX_INTERNAL void txl_sort(txl_t txl);
MDBX_INTERNAL bool txl_contain(const txl_t txl, txnid_t id); MDBX_MAYBE_UNUSED MDBX_INTERNAL bool txl_contain(const txl_t txl, txnid_t id);

View File

@ -62,6 +62,8 @@ __cold MDBX_txn *txn_basal_create(const size_t max_dbi) {
if (unlikely(!txn)) if (unlikely(!txn))
return txn; return txn;
rkl_init(&txn->wr.gc.reclaimed);
rkl_init(&txn->wr.gc.comeback);
txn->dbs = ptr_disp(txn, base); txn->dbs = ptr_disp(txn, base);
txn->cursors = ptr_disp(txn->dbs, max_dbi * sizeof(txn->dbs[0])); txn->cursors = ptr_disp(txn->dbs, max_dbi * sizeof(txn->dbs[0]));
txn->dbi_seqs = ptr_disp(txn->cursors, max_dbi * sizeof(txn->cursors[0])); txn->dbi_seqs = ptr_disp(txn->cursors, max_dbi * sizeof(txn->cursors[0]));
@ -82,7 +84,8 @@ __cold MDBX_txn *txn_basal_create(const size_t max_dbi) {
__cold void txn_basal_destroy(MDBX_txn *txn) { __cold void txn_basal_destroy(MDBX_txn *txn) {
dpl_free(txn); dpl_free(txn);
txl_free(txn->wr.gc.retxl); rkl_destroy(&txn->wr.gc.reclaimed);
rkl_destroy(&txn->wr.gc.comeback);
pnl_free(txn->wr.retired_pages); pnl_free(txn->wr.retired_pages);
pnl_free(txn->wr.spilled.list); pnl_free(txn->wr.spilled.list);
pnl_free(txn->wr.repnl); pnl_free(txn->wr.repnl);
@ -121,10 +124,9 @@ int txn_basal_start(MDBX_txn *txn, unsigned flags) {
MDBX_PNL_SETSIZE(txn->wr.retired_pages, 0); MDBX_PNL_SETSIZE(txn->wr.retired_pages, 0);
txn->wr.spilled.list = nullptr; txn->wr.spilled.list = nullptr;
txn->wr.spilled.least_removed = 0; txn->wr.spilled.least_removed = 0;
txn->wr.gc.time_acc = 0; txn->wr.gc.spent = 0;
txn->wr.gc.last_reclaimed = 0; tASSERT(txn, rkl_empty(&txn->wr.gc.reclaimed));
if (txn->wr.gc.retxl) txn->env->gc.detent = 0;
MDBX_PNL_SETSIZE(txn->wr.gc.retxl, 0);
env->txn = txn; env->txn = txn;
return MDBX_SUCCESS; return MDBX_SUCCESS;
@ -140,6 +142,8 @@ int txn_basal_end(MDBX_txn *txn, unsigned mode) {
env->txn = nullptr; env->txn = nullptr;
pnl_free(txn->wr.spilled.list); pnl_free(txn->wr.spilled.list);
txn->wr.spilled.list = nullptr; txn->wr.spilled.list = nullptr;
rkl_clear_and_shrink(&txn->wr.gc.reclaimed);
rkl_clear_and_shrink(&txn->wr.gc.comeback);
eASSERT(env, txn->parent == nullptr); eASSERT(env, txn->parent == nullptr);
pnl_shrink(&txn->wr.retired_pages); pnl_shrink(&txn->wr.retired_pages);
@ -258,9 +262,19 @@ int txn_basal_commit(MDBX_txn *txn, struct commit_timestamp *ts) {
} }
gcu_t gcu_ctx; gcu_t gcu_ctx;
int rc = gc_update_init(txn, &gcu_ctx); int rc = gc_put_init(txn, &gcu_ctx);
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = gc_update(txn, &gcu_ctx); rc = gc_update(txn, &gcu_ctx);
#if MDBX_ENABLE_BIGFOOT
const txnid_t commit_txnid = gcu_ctx.bigfoot;
if (commit_txnid > txn->txnid)
TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid));
#else
const txnid_t commit_txnid = txn->txnid;
#endif
gc_put_destroy(&gcu_ctx);
if (ts) if (ts)
ts->gc_cpu = osal_cputime(nullptr) - ts->gc_cpu; ts->gc_cpu = osal_cputime(nullptr) - ts->gc_cpu;
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
@ -334,13 +348,6 @@ int txn_basal_commit(MDBX_txn *txn, struct commit_timestamp *ts) {
meta.canary = txn->canary; meta.canary = txn->canary;
memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid)); memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid));
txnid_t commit_txnid = txn->txnid;
#if MDBX_ENABLE_BIGFOOT
if (gcu_ctx.bigfoot > txn->txnid) {
commit_txnid = gcu_ctx.bigfoot;
TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid));
}
#endif
meta.unsafe_sign = DATASIGN_NONE; meta.unsafe_sign = DATASIGN_NONE;
meta_set_txnid(env, &meta, commit_txnid); meta_set_txnid(env, &meta, commit_txnid);

View File

@ -349,6 +349,7 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) {
return LOG_IFERR(MDBX_ENOMEM); return LOG_IFERR(MDBX_ENOMEM);
tASSERT(parent, dpl_check(parent)); tASSERT(parent, dpl_check(parent));
rkl_init(&txn->wr.gc.reclaimed);
#if MDBX_ENABLE_DBI_SPARSE #if MDBX_ENABLE_DBI_SPARSE
txn->dbi_sparse = parent->dbi_sparse; txn->dbi_sparse = parent->dbi_sparse;
#endif /* MDBX_ENABLE_DBI_SPARSE */ #endif /* MDBX_ENABLE_DBI_SPARSE */
@ -403,12 +404,11 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) {
= parent->geo.first_unallocated) - = parent->geo.first_unallocated) -
MDBX_ENABLE_REFUND)); MDBX_ENABLE_REFUND));
txn->wr.gc.time_acc = parent->wr.gc.time_acc; txn->wr.gc.spent = parent->wr.gc.spent;
txn->wr.gc.last_reclaimed = parent->wr.gc.last_reclaimed; rkl_init(&txn->wr.gc.comeback);
if (parent->wr.gc.retxl) { err = rkl_copy(&parent->wr.gc.reclaimed, &txn->wr.gc.reclaimed);
txn->wr.gc.retxl = parent->wr.gc.retxl; if (unlikely(err != MDBX_SUCCESS))
parent->wr.gc.retxl = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->wr.gc.retxl); return err;
}
txn->wr.retired_pages = parent->wr.retired_pages; txn->wr.retired_pages = parent->wr.retired_pages;
parent->wr.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->wr.retired_pages); parent->wr.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->wr.retired_pages);
@ -438,6 +438,7 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) {
(txn->parent ? txn->parent->wr.dirtyroom : txn->env->options.dp_limit)); (txn->parent ? txn->parent->wr.dirtyroom : txn->env->options.dp_limit));
parent->env->txn = txn; parent->env->txn = txn;
tASSERT(parent, parent->cursors[FREE_DBI] == nullptr); tASSERT(parent, parent->cursors[FREE_DBI] == nullptr);
// TODO: shadow GC' cursor
return txn_shadow_cursors(parent, MAIN_DBI); return txn_shadow_cursors(parent, MAIN_DBI);
} }
@ -447,11 +448,7 @@ void txn_nested_abort(MDBX_txn *nested) {
nested->signature = 0; nested->signature = 0;
nested->owner = 0; nested->owner = 0;
if (nested->wr.gc.retxl) { rkl_destroy(&nested->wr.gc.reclaimed);
tASSERT(parent, MDBX_PNL_GETSIZE(nested->wr.gc.retxl) >= (uintptr_t)parent->wr.gc.retxl);
MDBX_PNL_SETSIZE(nested->wr.gc.retxl, (uintptr_t)parent->wr.gc.retxl);
parent->wr.gc.retxl = nested->wr.gc.retxl;
}
if (nested->wr.retired_pages) { if (nested->wr.retired_pages) {
tASSERT(parent, MDBX_PNL_GETSIZE(nested->wr.retired_pages) >= (uintptr_t)parent->wr.retired_pages); tASSERT(parent, MDBX_PNL_GETSIZE(nested->wr.retired_pages) >= (uintptr_t)parent->wr.retired_pages);
@ -530,17 +527,14 @@ int txn_nested_join(MDBX_txn *txn, struct commit_timestamp *ts) {
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
parent->wr.gc.retxl = txn->wr.gc.retxl;
txn->wr.gc.retxl = nullptr;
parent->wr.retired_pages = txn->wr.retired_pages; parent->wr.retired_pages = txn->wr.retired_pages;
txn->wr.retired_pages = nullptr; txn->wr.retired_pages = nullptr;
pnl_free(parent->wr.repnl); pnl_free(parent->wr.repnl);
parent->wr.repnl = txn->wr.repnl; parent->wr.repnl = txn->wr.repnl;
txn->wr.repnl = nullptr; txn->wr.repnl = nullptr;
parent->wr.gc.time_acc = txn->wr.gc.time_acc; parent->wr.gc.spent = txn->wr.gc.spent;
parent->wr.gc.last_reclaimed = txn->wr.gc.last_reclaimed; rkl_destructive_move(&txn->wr.gc.reclaimed, &parent->wr.gc.reclaimed);
parent->geo = txn->geo; parent->geo = txn->geo;
parent->canary = txn->canary; parent->canary = txn->canary;

View File

@ -3,8 +3,18 @@
#include "internals.h" #include "internals.h"
__hot txnid_t txn_snapshot_oldest(const MDBX_txn *const txn) { MDBX_cursor *txn_gc_cursor(MDBX_txn *txn) {
return mvcc_shapshot_oldest(txn->env, txn->wr.troika.txnid[txn->wr.troika.prefer_steady]); tASSERT(txn, (txn->flags & (MDBX_TXN_BLOCKED | MDBX_TXN_RDONLY)) == 0);
return ptr_disp(txn->env->basal_txn, sizeof(MDBX_txn));
}
__hot bool txn_gc_detent(const MDBX_txn *const txn) {
const txnid_t detent = mvcc_shapshot_oldest(txn->env, txn->wr.troika.txnid[txn->wr.troika.prefer_steady]);
if (likely(detent == txn->env->gc.detent))
return false;
txn->env->gc.detent = detent;
return true;
} }
void txn_done_cursors(MDBX_txn *txn) { void txn_done_cursors(MDBX_txn *txn) {
@ -417,12 +427,9 @@ MDBX_txn *txn_alloc(const MDBX_txn_flags_t flags, MDBX_env *env) {
txn = osal_malloc(size); txn = osal_malloc(size);
if (unlikely(!txn)) if (unlikely(!txn))
return txn; return txn;
#if MDBX_DEBUG
memset(txn, 0xCD, size);
VALGRIND_MAKE_MEM_UNDEFINED(txn, size);
#endif /* MDBX_DEBUG */
MDBX_ANALYSIS_ASSUME(size > base); MDBX_ANALYSIS_ASSUME(size > base);
memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base);
txn->dbs = ptr_disp(txn, base); txn->dbs = ptr_disp(txn, base);
txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0]));
#if MDBX_DEBUG #if MDBX_DEBUG

View File

@ -3,6 +3,17 @@
#include "internals.h" #include "internals.h"
MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned ceil_log2n(size_t value_uintptr) {
assert(value_uintptr > 0 && value_uintptr < INT32_MAX);
value_uintptr -= 1;
value_uintptr |= value_uintptr >> 1;
value_uintptr |= value_uintptr >> 2;
value_uintptr |= value_uintptr >> 4;
value_uintptr |= value_uintptr >> 8;
value_uintptr |= value_uintptr >> 16;
return log2n_powerof2(value_uintptr + 1);
}
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr) { MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr) {
assert(value_uintptr > 0 && value_uintptr < INT32_MAX && is_powerof2(value_uintptr)); assert(value_uintptr > 0 && value_uintptr < INT32_MAX && is_powerof2(value_uintptr));
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr); assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);

View File

@ -58,6 +58,8 @@ MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t ceil_powerof2
MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr); MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr);
MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned ceil_log2n(size_t value_uintptr);
MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v); MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v);
struct monotime_cache { struct monotime_cache {