From 86890b4756542868a913aefdd8ecb0d2f91bdccb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 3 Jun 2025 21:54:24 +0300 Subject: [PATCH] =?UTF-8?q?mdbx:=20=D1=80=D0=B0=D0=BD=D0=BD=D1=8F=D1=8F/?= =?UTF-8?q?=D0=BD=D0=B5-=D0=BE=D1=82=D0=BB=D0=BE=D0=B6=D0=B5=D0=BD=D0=BD?= =?UTF-8?q?=D0=B0=D1=8F=20=D0=BE=D1=87=D0=B8=D1=81=D1=82=D0=BA=D0=B0=20GC?= =?UTF-8?q?=20(=D0=BD=D0=B0=D1=87=D0=B0=D0=BB=D0=BE).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-txn.c | 2 +- src/gc-get.c | 44 +++++++++++++++++++++------------------- src/gc-put.c | 52 ++++++++++++++++++++++-------------------------- src/gc.h | 4 +++- src/internals.h | 7 ++++--- src/node.h | 1 - src/page-ops.c | 8 ++++---- src/pnl.c | 2 +- src/pnl.h | 38 +++++++++++++++++++++-------------- src/proto.h | 1 - src/txn-basal.c | 5 +++++ src/txn-nested.c | 10 +++++++--- src/txn.c | 8 ++------ 13 files changed, 97 insertions(+), 85 deletions(-) diff --git a/src/api-txn.c b/src/api-txn.c index 19371211..33fba20e 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -520,7 +520,7 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) { txn_gc_detent(txn); oldest_reading = txn->env->gc.detent; if (oldest_reading == txn->wr.troika.txnid[txn->wr.troika.recent]) { - /* Если самый старый используемый снимок является предыдущим, т. е. непосредственно предшествующим текущей + /* Если самый старый используемый снимок является предыдущим, т.е. непосредственно предшествующим текущей * транзакции, то просматриваем таблицу читателей чтобы выяснить действительно ли снимок используется * читателями. */ oldest_reading = txn->txnid; diff --git a/src/gc-get.c b/src/gc-get.c index d03091f7..27c47174 100644 --- a/src/gc-get.c +++ b/src/gc-get.c @@ -890,12 +890,6 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) if (num > 0 && txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->wr.repnl) < env->maxgc_large1page / 2) flags += ALLOC_COALESCE; - MDBX_cursor *const gc = txn_gc_cursor(txn); - eASSERT(env, mc != gc && gc->next == gc); - gc->txn = txn; - gc->dbi_state = txn->dbi_state; - gc->top_and_flags = z_fresh_mark; - txn->wr.prefault_write_activated = !env->incore && env->options.prefault_write; if (txn->wr.prefault_write_activated) { /* Проверка посредством minicore() существенно снижает затраты, но в @@ -913,6 +907,12 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) txn->wr.prefault_write_activated = false; } + MDBX_cursor *const gc = gc_cursor(env); + gc->txn = txn; + gc->tree = txn->dbs; + gc->dbi_state = txn->dbi_state; + gc->top_and_flags = z_fresh_mark; + retry_gc_refresh_detent: txn_gc_detent(txn); retry_gc_have_detent: @@ -1046,20 +1046,6 @@ next_gc: } } - /* Remember ID of readed GC record */ - ret.err = rkl_push( - &txn->wr.gc.reclaimed, id - /* Вместо known_continuous=false, тут можно передавать/использовать (flags & ALLOC_LIFO) == 0, тогда дыры/пропуски - * в идентификаторах GC будут образовывать непрерывные интервалы в wr.gc.reclaimed, что обеспечит больше свободных - * идентификаторов/слотов для возврата страниц. Однако, это также приведёт к пустым попыткам удаления - * отсутствующих записей в gc_clear_reclaimed(), а далее к перекладыванию этих сплошных интервалов поэлементно в - * ready4reuse. Поэтому смысла в этом решительно нет. Следует либо формировать сплошные интервалы при работе - * gc_clear_reclaimed(), особенно в FIFO-режиме, либо искать их только в gc_provide_ids() */ - ); - TRACE("%" PRIaTXN " len %zu pushed to txn-rkl, err %d", id, gc_len, ret.err); - if (unlikely(ret.err != MDBX_SUCCESS)) - goto fail; - /* Append PNL from GC record to wr.repnl */ ret.err = pnl_need(&txn->wr.repnl, gc_len); if (unlikely(ret.err != MDBX_SUCCESS)) @@ -1102,7 +1088,23 @@ next_gc: } eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND)); - /* TODO: удаление загруженных из GC записей */ + rkl_t *rkl = &txn->wr.gc.reclaimed; + const char *rkl_name = "reclaimed"; + if (mc->dbi_state != txn->dbi_state && + (MDBX_DEBUG || MDBX_PNL_GETSIZE(txn->wr.repnl) > (size_t)gc->tree->height + gc->tree->height + 3)) { + gc->next = txn->cursors[FREE_DBI]; + txn->cursors[FREE_DBI] = gc; + ret.err = cursor_del(gc, 0); + txn->cursors[FREE_DBI] = gc->next; + if (unlikely(ret.err != MDBX_SUCCESS)) + goto fail; + rkl = &txn->wr.gc.ready4reuse; + rkl_name = "ready4reuse"; + } + ret.err = rkl_push(rkl, id); + TRACE("%" PRIaTXN " len %zu pushed to rkl-%s, err %d", id, gc_len, rkl_name, ret.err); + if (unlikely(ret.err != MDBX_SUCCESS)) + goto fail; eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); if (flags & ALLOC_COALESCE) { diff --git a/src/gc-put.c b/src/gc-put.c index 0a4066d5..968df20a 100644 --- a/src/gc-put.c +++ b/src/gc-put.c @@ -4,10 +4,9 @@ #include "internals.h" int gc_put_init(MDBX_txn *txn, gcu_t *ctx) { - memset(ctx, 0, offsetof(gcu_t, ready4reuse)); + memset(ctx, 0, offsetof(gcu_t, sequel)); /* Размер куска помещающийся на одну отдельную "overflow" страницу, но с небольшим запасом сводобного места. */ ctx->goodchunk = txn->env->maxgc_large1page - (txn->env->maxgc_large1page >> 4); - rkl_init(&ctx->ready4reuse); rkl_init(&ctx->sequel); #if MDBX_ENABLE_BIGFOOT ctx->bigfoot = txn->txnid; @@ -15,10 +14,7 @@ int gc_put_init(MDBX_txn *txn, gcu_t *ctx) { return cursor_init(&ctx->cursor, txn, FREE_DBI); } -void gc_put_destroy(gcu_t *ctx) { - rkl_destroy(&ctx->ready4reuse); - rkl_destroy(&ctx->sequel); -} +void gc_put_destroy(gcu_t *ctx) { rkl_destroy(&ctx->sequel); } static size_t gc_chunk_pages(const MDBX_txn *txn, const size_t chunk) { return largechunk_npages(txn->env, gc_chunk_bytes(chunk)); @@ -100,10 +96,10 @@ MDBX_MAYBE_UNUSED static void dbg_dump_ids(gcu_t *ctx) { DEBUG_EXTRA_PRINT("%s\n", " empty"); DEBUG_EXTRA("%s", "ready4reuse:"); - if (rkl_empty(&ctx->ready4reuse)) + if (rkl_empty(&txn->wr.gc.ready4reuse)) DEBUG_EXTRA_PRINT("%s\n", " empty"); else { - rkl_iter_t i = rkl_iterator(&ctx->ready4reuse, false); + rkl_iter_t i = rkl_iterator(&txn->wr.gc.ready4reuse, false); txnid_t id = rkl_turn(&i, false); while (id) { dbg_id(ctx, id); @@ -485,7 +481,7 @@ static int gc_remove_rkl(MDBX_txn *txn, gcu_t *ctx, rkl_t *rkl) { int err = cursor_seek(&ctx->cursor, &key, nullptr, MDBX_SET).err; tASSERT(txn, id == rkl_edge(rkl, is_lifo(txn))); if (err == MDBX_NOTFOUND) { - err = rkl_push(&ctx->ready4reuse, rkl_pop(rkl, is_lifo(txn))); + err = rkl_push(&txn->wr.gc.ready4reuse, rkl_pop(rkl, is_lifo(txn))); WARNING("unexpected %s for gc-id %" PRIaTXN ", ignore and continue, push-err %d", "MDBX_NOTFOUND", id, err); if (unlikely(MDBX_IS_ERROR(err))) return err; @@ -506,7 +502,7 @@ static int gc_remove_rkl(MDBX_txn *txn, gcu_t *ctx, rkl_t *rkl) { return err; ENSURE(txn->env, id == rkl_pop(rkl, is_lifo(txn))); tASSERT(txn, id <= txn->env->lck->cached_oldest.weak); - err = rkl_push(&ctx->ready4reuse, id); + err = rkl_push(&txn->wr.gc.ready4reuse, id); if (unlikely(err != MDBX_SUCCESS)) return err; TRACE("id %" PRIaTXN " cleared and moved to ready4reuse", id); @@ -526,7 +522,7 @@ static inline int gc_clear_returned(MDBX_txn *txn, gcu_t *ctx) { static int gc_push_sequel(MDBX_txn *txn, gcu_t *ctx, txnid_t id) { tASSERT(txn, id > 0 && id < txn->env->gc.detent); - tASSERT(txn, !rkl_contain(&txn->wr.gc.comeback, id) && !rkl_contain(&ctx->ready4reuse, id)); + tASSERT(txn, !rkl_contain(&txn->wr.gc.comeback, id) && !rkl_contain(&txn->wr.gc.ready4reuse, id)); TRACE("id %" PRIaTXN ", return-left %zi", id, ctx->return_left); int err = rkl_push(&ctx->sequel, id); if (unlikely(err != MDBX_SUCCESS)) { @@ -705,7 +701,7 @@ static bool solve_recursive(const sr_context_t *const ct, sr_state_t *const st, static int gc_dense_solve(MDBX_txn *txn, gcu_t *ctx, gc_dense_histogram_t *const solution) { sr_state_t st = { - .left_slots = rkl_len(&ctx->ready4reuse), .left_volume = ctx->return_left, .hist = ctx->dense_histogram}; + .left_slots = rkl_len(&txn->wr.gc.ready4reuse), .left_volume = ctx->return_left, .hist = ctx->dense_histogram}; assert(st.left_slots > 0 && st.left_volume > 0 && MDBX_PNL_GETSIZE(txn->wr.repnl) > 0); if (unlikely(!st.left_slots || !st.left_volume)) { ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "recursive-solving preconditions violated"); @@ -734,7 +730,7 @@ static int gc_dense_solve(MDBX_txn *txn, gcu_t *ctx, gc_dense_histogram_t *const chunk += ct.other_pages; } - if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&ctx->ready4reuse))) { + if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&txn->wr.gc.ready4reuse))) { assert(!"recursive-solving failure"); ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "recursive-solving failure"); return MDBX_PROBLEM; @@ -782,7 +778,7 @@ static int gc_dense_solve(MDBX_txn *txn, gcu_t *ctx, gc_dense_histogram_t *const // chunk += ct.other_pages; // } // -// if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&ctx->ready4reuse))) { +// if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&txn->wr.gc.ready4reuse))) { // assert(!"recursive-solving failure"); // ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "recursive-solving failure"); // return MDBX_PROBLEM; @@ -848,7 +844,7 @@ static int gc_search_holes(MDBX_txn *txn, gcu_t *ctx) { ((ctx->gc_first > UINT16_MAX) ? UINT16_MAX : (unsigned)ctx->gc_first - 1) * ctx->goodchunk; const txnid_t reasonable_deep = txn->env->maxgc_per_branch + - 2 * (txn->env->gc.detent - txnid_min(rkl_lowest(&ctx->ready4reuse), rkl_lowest(&txn->wr.gc.comeback))); + 2 * (txn->env->gc.detent - txnid_min(rkl_lowest(&txn->wr.gc.ready4reuse), rkl_lowest(&txn->wr.gc.comeback))); const txnid_t scan_threshold = (txn->env->gc.detent > reasonable_deep) ? txn->env->gc.detent - reasonable_deep : 0; txnid_t scan_hi = txn->env->gc.detent, scan_lo = INVALID_TXNID; @@ -859,7 +855,7 @@ static int gc_search_holes(MDBX_txn *txn, gcu_t *ctx) { } rkl_iter_t iter_ready4reuse, iter_comeback; - rkl_find(&ctx->ready4reuse, scan_hi, &iter_ready4reuse); + rkl_find(&txn->wr.gc.ready4reuse, scan_hi, &iter_ready4reuse); rkl_find(&txn->wr.gc.comeback, scan_hi, &iter_comeback); rkl_hole_t hole_ready4reuse = rkl_hole(&iter_ready4reuse, true); rkl_hole_t hole_comeback = rkl_hole(&iter_comeback, true); @@ -948,8 +944,8 @@ static int gc_search_holes(MDBX_txn *txn, gcu_t *ctx) { } static inline int gc_reserve4return(MDBX_txn *txn, gcu_t *ctx, const size_t chunk_lo, const size_t chunk_hi) { - txnid_t reservation_id = rkl_pop(&ctx->ready4reuse, true); - TRACE("%s: slots-ready4reuse-left %zu, reservation-id %" PRIaTXN, dbg_prefix(ctx), rkl_len(&ctx->ready4reuse), + txnid_t reservation_id = rkl_pop(&txn->wr.gc.ready4reuse, true); + TRACE("%s: slots-ready4reuse-left %zu, reservation-id %" PRIaTXN, dbg_prefix(ctx), rkl_len(&txn->wr.gc.ready4reuse), reservation_id); tASSERT(txn, reservation_id >= MIN_TXNID && reservation_id < txn->txnid); tASSERT(txn, reservation_id <= txn->env->lck->cached_oldest.weak); @@ -1046,7 +1042,7 @@ static int gc_handle_dense(MDBX_txn *txn, gcu_t *ctx, size_t left_min, size_t le * размещения всех возвращаемых страниц. */ int err = MDBX_RESULT_FALSE; - if (!rkl_empty(&ctx->ready4reuse)) { + if (!rkl_empty(&txn->wr.gc.ready4reuse)) { gc_dense_hist(txn, ctx); gc_dense_histogram_t solution; if (ctx->loop == 1 || ctx->loop % 3 == 0) @@ -1106,13 +1102,13 @@ static int gc_handle_dense(MDBX_txn *txn, gcu_t *ctx, size_t left_min, size_t le const size_t per_page = txn->env->ps / sizeof(pgno_t); size_t amount = MDBX_PNL_GETSIZE(txn->wr.repnl); do { - if (rkl_empty(&ctx->ready4reuse)) { + if (rkl_empty(&txn->wr.gc.ready4reuse)) { NOTICE("%s: restart since no slot(s) available (reserved %zu...%zu of %zu)", dbg_prefix(ctx), ctx->return_reserved_lo, ctx->return_reserved_hi, amount); return MDBX_RESULT_TRUE; } const size_t left = dense_adjust_amount(txn, amount) - ctx->return_reserved_hi; - const size_t slots = rkl_len(&ctx->ready4reuse); + const size_t slots = rkl_len(&txn->wr.gc.ready4reuse); const size_t base = (left + slots - 1) / slots; const size_t adjusted = dense_adjust_chunk(txn, base); TRACE("dense-reservation: reserved %zu...%zu of %zu, left %zu slot(s) and %zu pnl, step: %zu base," @@ -1139,7 +1135,7 @@ static int gc_handle_dense(MDBX_txn *txn, gcu_t *ctx, size_t left_min, size_t le if (unlikely(err != MDBX_SUCCESS)) ERROR("unable provide IDs and/or to fit returned PNL (%zd+%zd pages, %zd+%zd slots), err %d", ctx->retired_stored, - MDBX_PNL_GETSIZE(txn->wr.repnl), rkl_len(&txn->wr.gc.comeback), rkl_len(&ctx->ready4reuse), err); + MDBX_PNL_GETSIZE(txn->wr.repnl), rkl_len(&txn->wr.gc.comeback), rkl_len(&txn->wr.gc.ready4reuse), err); return err; } @@ -1178,11 +1174,11 @@ static int gc_rerere(MDBX_txn *txn, gcu_t *ctx) { const size_t left_min = amount - ctx->return_reserved_hi; const size_t left_max = amount - ctx->return_reserved_lo; - if (likely(left_min < txn->env->maxgc_large1page && !rkl_empty(&ctx->ready4reuse))) { + if (likely(left_min < txn->env->maxgc_large1page && !rkl_empty(&txn->wr.gc.ready4reuse))) { /* Есть хотя-бы один слот и весь остаток списка номеров страниц помещается в один кусок. * Это самая частая ситуация, просто продолжаем. */ } else { - if (likely(rkl_len(&ctx->ready4reuse) * ctx->goodchunk >= left_max)) { + if (likely(rkl_len(&txn->wr.gc.ready4reuse) * ctx->goodchunk >= left_max)) { /* Слотов хватает, основная задача делить на куски так, чтобы изменение (уменьшение) кол-ва возвращаемых страниц в * процессе резервирования записей в GC не потребовало менять резервирование, т.е. удалять и повторять всё снова. */ @@ -1195,7 +1191,7 @@ static int gc_rerere(MDBX_txn *txn, gcu_t *ctx) { return err; if (!rkl_empty(&ctx->sequel)) { - err = rkl_merge(&ctx->sequel, &ctx->ready4reuse, false); + err = rkl_merge(&ctx->sequel, &txn->wr.gc.ready4reuse, false); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_RESULT_TRUE) { ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "unexpected duplicate(s) during rkl-merge"); @@ -1208,14 +1204,14 @@ static int gc_rerere(MDBX_txn *txn, gcu_t *ctx) { if (unlikely(ctx->return_left > 0)) { /* Делаем переоценку баланса для кусков предельного размера (по maxgc_large1page, вместо goodchunk). */ - const intptr_t dense_unfit = left_min - rkl_len(&ctx->ready4reuse) * txn->env->maxgc_large1page; + const intptr_t dense_unfit = left_min - rkl_len(&txn->wr.gc.ready4reuse) * txn->env->maxgc_large1page; if (dense_unfit > 0) { /* Имеющихся идентификаторов НЕ хватит, * даже если если их использовать для кусков размером maxgc_large1page вместо goodchunk. */ if (!ctx->dense) { NOTICE("%s: enter to dense-mode (amount %zu, reserved %zu..%zu, slots/ids %zu, left %zu..%zu, unfit %zu)", dbg_prefix(ctx), amount, ctx->return_reserved_lo, ctx->return_reserved_hi, - rkl_len(&ctx->ready4reuse), left_min, left_max, dense_unfit); + rkl_len(&txn->wr.gc.ready4reuse), left_min, left_max, dense_unfit); ctx->dense = true; } return gc_handle_dense(txn, ctx, left_min, left_max); @@ -1373,7 +1369,7 @@ int gc_update(MDBX_txn *txn, gcu_t *ctx) { } /* The txn->wr.repnl[] can grow and shrink during this call. - * The txn->wr.gc.reclaimed[] can grow, then migrate into ctx->ready4reuse and later to txn->wr.gc.comeback[]. + * The txn->wr.gc.reclaimed[] can grow, then migrate into txn->wr.gc.ready4reuse and later to txn->wr.gc.comeback[]. * But page numbers cannot disappear from txn->wr.retired_pages[]. */ retry: ctx->loop += !(ctx->prev_first_unallocated > txn->geo.first_unallocated); diff --git a/src/gc.h b/src/gc.h index b81dc82f..cf8d26ab 100644 --- a/src/gc.h +++ b/src/gc.h @@ -35,7 +35,7 @@ typedef struct gc_update_context { unsigned n; } dbg; #endif /* MDBX_DEBUG_GCU */ - rkl_t ready4reuse, sequel; + rkl_t sequel; #if MDBX_ENABLE_BIGFOOT txnid_t bigfoot; #endif /* MDBX_ENABLE_BIGFOOT */ @@ -78,3 +78,5 @@ static inline bool gc_is_reclaimed(const MDBX_txn *txn, const txnid_t id) { static inline txnid_t txnid_min(txnid_t a, txnid_t b) { return (a < b) ? a : b; } static inline txnid_t txnid_max(txnid_t a, txnid_t b) { return (a > b) ? a : b; } + +static inline MDBX_cursor *gc_cursor(MDBX_env *env) { return ptr_disp(env->basal_txn, sizeof(MDBX_txn)); } diff --git a/src/internals.h b/src/internals.h index 5400cac4..e9e20331 100644 --- a/src/internals.h +++ b/src/internals.h @@ -214,9 +214,10 @@ struct MDBX_txn { troika_t troika; pnl_t __restrict repnl; /* Reclaimed GC pages */ struct { - rkl_t reclaimed; /* The list of reclaimed txn-ids from GC */ - uint64_t spent; /* Time spent reading and searching GC */ - rkl_t comeback; /* The list of ids of records returned into GC during commit, etc */ + rkl_t reclaimed; /* The list of reclaimed txn-ids from GC, but not cleared/deleted */ + rkl_t ready4reuse; /* The list of reclaimed txn-ids from GC, and cleared/deleted */ + uint64_t spent; /* Time spent reading and searching GC */ + rkl_t comeback; /* The list of ids of records returned into GC during commit, etc */ } gc; bool prefault_write_activated; #if MDBX_ENABLE_REFUND diff --git a/src/node.h b/src/node.h index 03069021..7125ce6c 100644 --- a/src/node.h +++ b/src/node.h @@ -17,7 +17,6 @@ MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_pgno(const node_t *const __ /* Set the page number in a branch node */ static inline void node_set_pgno(node_t *const __restrict node, pgno_t pgno) { assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO); - UNALIGNED_POKE_32(node, node_t, child_pgno, (uint32_t)pgno); } diff --git a/src/page-ops.c b/src/page-ops.c index 599f12c9..3aa869ec 100644 --- a/src/page-ops.c +++ b/src/page-ops.c @@ -179,19 +179,19 @@ __hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, const page_t *c page_t *np; if (is_frozen(txn, mp)) { /* CoW the page */ - rc = pnl_need(&txn->wr.retired_pages, 1); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; const pgr_t par = gc_alloc_single(mc); rc = par.err; np = par.page; if (unlikely(rc != MDBX_SUCCESS)) goto fail; + rc = pnl_append(&txn->wr.retired_pages, mp->pgno); + if (unlikely(rc != MDBX_SUCCESS)) + goto fail; + const pgno_t pgno = np->pgno; DEBUG("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, cursor_dbi_dbg(mc), mp->pgno, pgno); tASSERT(txn, mp->pgno != pgno); - pnl_append_prereserved(txn->wr.retired_pages, mp->pgno); /* Update the parent page, if any, to point to the new page */ if (likely(mc->top)) { page_t *parent = mc->pg[mc->top - 1]; diff --git a/src/pnl.c b/src/pnl.c index d573c987..499553f7 100644 --- a/src/pnl.c +++ b/src/pnl.c @@ -52,7 +52,7 @@ void pnl_shrink(pnl_t __restrict *__restrict ppnl) { int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna) { const size_t allocated = MDBX_PNL_ALLOCLEN(*ppnl); assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl)); - if (likely(allocated >= wanna)) + if (unlikely(allocated >= wanna)) return MDBX_SUCCESS; if (unlikely(wanna > /* paranoia */ PAGELIST_LIMIT)) { diff --git a/src/pnl.h b/src/pnl.h index 416ed9bb..2b398f56 100644 --- a/src/pnl.h +++ b/src/pnl.h @@ -56,21 +56,6 @@ typedef const pgno_t *const_pnl_t; #define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t)) #define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0) -MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) { - assert(size > 0 && size <= PAGELIST_LIMIT); -#if MDBX_PNL_PREALLOC_FOR_RADIXSORT - - size += size; -#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ - STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + - (PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + MDBX_PNL_GRANULATE + 3) * sizeof(pgno_t) < - SIZE_MAX / 4 * 3); - size_t bytes = - ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), MDBX_PNL_GRANULATE * sizeof(pgno_t)) - - MDBX_ASSUME_MALLOC_OVERHEAD; - return bytes; -} - MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size(const size_t bytes) { size_t size = bytes / sizeof(pgno_t); assert(size > 3 && size <= PAGELIST_LIMIT + /* alignment gap */ 65536); @@ -81,6 +66,22 @@ MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size return (pgno_t)size; } +MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t wanna_size) { + size_t size = wanna_size; + assert(size > 0 && size <= PAGELIST_LIMIT); +#if MDBX_PNL_PREALLOC_FOR_RADIXSORT + size += size; +#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */ + STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + + (PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + MDBX_PNL_GRANULATE + 3) * sizeof(pgno_t) < + SIZE_MAX / 4 * 3); + size_t bytes = + ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), MDBX_PNL_GRANULATE * sizeof(pgno_t)) - + MDBX_ASSUME_MALLOC_OVERHEAD; + assert(pnl_bytes2size(bytes) >= wanna_size); + return bytes; +} + MDBX_INTERNAL pnl_t pnl_alloc(size_t size); MDBX_INTERNAL void pnl_free(pnl_t pnl); @@ -106,6 +107,13 @@ MDBX_MAYBE_UNUSED static inline void pnl_append_prereserved(__restrict pnl_t pnl MDBX_PNL_LAST(pnl) = pgno; } +MDBX_MAYBE_UNUSED static inline int __must_check_result pnl_append(__restrict pnl_t *ppnl, pgno_t pgno) { + int rc = pnl_need(ppnl, 1); + if (likely(rc == MDBX_SUCCESS)) + pnl_append_prereserved(*ppnl, pgno); + return rc; +} + MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl); MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n); diff --git a/src/proto.h b/src/proto.h index 3d76ed74..aa3e0ade 100644 --- a/src/proto.h +++ b/src/proto.h @@ -65,7 +65,6 @@ MDBX_INTERNAL bool txn_gc_detent(const MDBX_txn *const txn); MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits); MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn); MDBX_INTERNAL int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi); -MDBX_INTERNAL MDBX_cursor *txn_gc_cursor(MDBX_txn *txn); MDBX_INTERNAL MDBX_txn *txn_alloc(const MDBX_txn_flags_t flags, MDBX_env *env); MDBX_INTERNAL int txn_abort(MDBX_txn *txn); diff --git a/src/txn-basal.c b/src/txn-basal.c index 25856fda..28cc23a8 100644 --- a/src/txn-basal.c +++ b/src/txn-basal.c @@ -63,6 +63,7 @@ __cold MDBX_txn *txn_basal_create(const size_t max_dbi) { return txn; rkl_init(&txn->wr.gc.reclaimed); + rkl_init(&txn->wr.gc.ready4reuse); rkl_init(&txn->wr.gc.comeback); txn->dbs = ptr_disp(txn, base); txn->cursors = ptr_disp(txn->dbs, max_dbi * sizeof(txn->dbs[0])); @@ -85,6 +86,7 @@ __cold MDBX_txn *txn_basal_create(const size_t max_dbi) { __cold void txn_basal_destroy(MDBX_txn *txn) { dpl_free(txn); rkl_destroy(&txn->wr.gc.reclaimed); + rkl_destroy(&txn->wr.gc.ready4reuse); rkl_destroy(&txn->wr.gc.comeback); pnl_free(txn->wr.retired_pages); pnl_free(txn->wr.spilled.list); @@ -127,6 +129,8 @@ int txn_basal_start(MDBX_txn *txn, unsigned flags) { txn->wr.spilled.least_removed = 0; txn->wr.gc.spent = 0; tASSERT(txn, rkl_empty(&txn->wr.gc.reclaimed)); + tASSERT(txn, rkl_empty(&txn->wr.gc.ready4reuse)); + tASSERT(txn, rkl_empty(&txn->wr.gc.comeback)); txn->env->gc.detent = 0; env->txn = txn; @@ -144,6 +148,7 @@ int txn_basal_end(MDBX_txn *txn, unsigned mode) { pnl_free(txn->wr.spilled.list); txn->wr.spilled.list = nullptr; rkl_clear_and_shrink(&txn->wr.gc.reclaimed); + rkl_clear_and_shrink(&txn->wr.gc.ready4reuse); rkl_clear_and_shrink(&txn->wr.gc.comeback); eASSERT(env, txn->parent == nullptr); diff --git a/src/txn-nested.c b/src/txn-nested.c index 5c5bbde5..fe4550e4 100644 --- a/src/txn-nested.c +++ b/src/txn-nested.c @@ -357,7 +357,6 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) { txn->env->txn = txn; txn->owner = parent->owner; txn->wr.troika = parent->wr.troika; - rkl_init(&txn->wr.gc.reclaimed); #if MDBX_ENABLE_DBI_SPARSE txn->dbi_sparse = parent->dbi_sparse; @@ -415,6 +414,9 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) { txn->wr.gc.spent = parent->wr.gc.spent; rkl_init(&txn->wr.gc.comeback); err = rkl_copy(&parent->wr.gc.reclaimed, &txn->wr.gc.reclaimed); + if (unlikely(err != MDBX_SUCCESS)) + return err; + err = rkl_copy(&parent->wr.gc.ready4reuse, &txn->wr.gc.ready4reuse); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -432,8 +434,6 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) { (parent->parent ? parent->parent->wr.dirtyroom : parent->env->options.dp_limit)); tASSERT(txn, txn->wr.dirtyroom + txn->wr.dirtylist->length == (txn->parent ? txn->parent->wr.dirtyroom : txn->env->options.dp_limit)); - tASSERT(parent, parent->cursors[FREE_DBI] == nullptr); - // TODO: shadow GC' cursor return txn_shadow_cursors(parent, MAIN_DBI); } @@ -443,7 +443,9 @@ void txn_nested_abort(MDBX_txn *nested) { nested->signature = 0; nested->owner = 0; + tASSERT(nested, rkl_empty(&nested->wr.gc.comeback)); rkl_destroy(&nested->wr.gc.reclaimed); + rkl_destroy(&nested->wr.gc.ready4reuse); if (nested->wr.retired_pages) { tASSERT(parent, MDBX_PNL_GETSIZE(nested->wr.retired_pages) >= (uintptr_t)parent->wr.retired_pages); @@ -527,6 +529,8 @@ int txn_nested_join(MDBX_txn *txn, struct commit_timestamp *ts) { txn->wr.repnl = nullptr; parent->wr.gc.spent = txn->wr.gc.spent; rkl_destructive_move(&txn->wr.gc.reclaimed, &parent->wr.gc.reclaimed); + rkl_destructive_move(&txn->wr.gc.ready4reuse, &parent->wr.gc.ready4reuse); + tASSERT(txn, rkl_empty(&txn->wr.gc.comeback)); parent->geo = txn->geo; parent->canary = txn->canary; diff --git a/src/txn.c b/src/txn.c index fce99720..a66348e9 100644 --- a/src/txn.c +++ b/src/txn.c @@ -3,11 +3,6 @@ #include "internals.h" -MDBX_cursor *txn_gc_cursor(MDBX_txn *txn) { - tASSERT(txn, (txn->flags & (MDBX_TXN_BLOCKED | MDBX_TXN_RDONLY)) == 0); - return ptr_disp(txn->env->basal_txn, sizeof(MDBX_txn)); -} - __hot bool txn_gc_detent(const MDBX_txn *const txn) { const txnid_t detent = mvcc_shapshot_oldest(txn->env, txn->wr.troika.txnid[txn->wr.troika.prefer_steady]); if (likely(detent == txn->env->gc.detent)) @@ -33,7 +28,7 @@ void txn_done_cursors(MDBX_txn *txn) { } int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi) { - tASSERT(parent, dbi > FREE_DBI && dbi < parent->n_dbi); + tASSERT(parent, dbi < parent->n_dbi); MDBX_cursor *cursor = parent->cursors[dbi]; if (!cursor) return MDBX_SUCCESS; @@ -322,6 +317,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) { rc = cursor_init(gc, txn, FREE_DBI); if (rc != MDBX_SUCCESS) goto bailout; + tASSERT(txn, txn->cursors[FREE_DBI] == nullptr); } dxb_sanitize_tail(env, txn); return MDBX_SUCCESS;