From b9b784c18e72967c79943d0f7622dfa1fa2b1508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 7 Jan 2025 17:24:08 +0300 Subject: [PATCH] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA=D1=82?= =?UTF-8?q?=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20`mdbx=5Ftxn=5Fcommit=5Fex()`?= =?UTF-8?q?=203/5=20(=D0=B2=D1=8B=D1=87=D0=BB=D0=B5=D0=BD=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`txn=5Fnested=5Fjoin()`).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api-txn.c | 146 +++++--------------------------------------------- src/proto.h | 6 ++- src/txn.c | 131 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 147 insertions(+), 136 deletions(-) diff --git a/src/api-txn.c b/src/api-txn.c index 23cfdcc0..c2f3d6ce 100644 --- a/src/api-txn.c +++ b/src/api-txn.c @@ -306,10 +306,6 @@ static void latency_gcprof(MDBX_commit_latency *latency, const MDBX_txn *txn) { } } -struct commit_timestamp { - uint64_t start, prep, gc, audit, write, sync, gc_cpu; -}; - static void latency_init(MDBX_commit_latency *latency, struct commit_timestamp *ts) { ts->start = 0; ts->gc_cpu = 0; @@ -349,9 +345,6 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { rc = MDBX_RESULT_TRUE; goto fail; } - bailout: - if (latency) - memset(latency, 0, sizeof(*latency)); return LOG_IFERR(rc); } @@ -359,7 +352,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (MDBX_ENV_CHECKPID && unlikely(env->pid != osal_getpid())) { env->flags |= ENV_FATAL_ERROR; rc = MDBX_PANIC; - goto bailout; + return LOG_IFERR(rc); } if (unlikely(txn->flags & MDBX_TXN_RDONLY)) { @@ -389,139 +382,26 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { } if (unlikely(txn != env->txn)) { - DEBUG("%s", "attempt to commit unknown transaction"); + ERROR("attempt to commit %s txn %p", "unknown", (void *)txn); rc = MDBX_EINVAL; - goto fail; + return LOG_IFERR(rc); } if (txn->parent) { - tASSERT(txn, audit_ex(txn, 0, false) == 0); - eASSERT(env, txn != env->basal_txn); - MDBX_txn *const parent = txn->parent; - eASSERT(env, parent->signature == txn_signature); - eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); - eASSERT(env, dpl_check(txn)); - - if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) { - TXN_FOREACH_DBI_ALL(txn, i) { - tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0); - if ((txn->dbi_state[i] & DBI_STALE) && !(parent->dbi_state[i] & DBI_STALE)) - tASSERT(txn, memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); - } - - tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0); - tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0); - tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); - tASSERT(txn, txn->tw.loose_count == 0); - + if (unlikely(txn->parent->nested != txn || txn->parent->env != env)) { + ERROR("attempt to commit %s txn %p", "strange nested", (void *)txn); + rc = MDBX_PROBLEM; + return LOG_IFERR(rc); + } + rc = txn_nested_join(txn, latency ? &ts : nullptr); + if (likely(rc == MDBX_SUCCESS)) + goto provide_latency; + if (rc == MDBX_RESULT_TRUE) { /* fast completion of pure nested transaction */ - VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid); end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; goto done; } - - /* Preserve space for spill list to avoid parent's state corruption - * if allocation fails. */ - const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; - tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; - if (retired_delta) { - rc = pnl_need(&txn->tw.repnl, retired_delta); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - - if (txn->tw.spilled.list) { - if (parent->tw.spilled.list) { - rc = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list)); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; - } - spill_purge(txn); - } - - if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent && - !dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) { - rc = MDBX_ENOMEM; - goto fail; - } - - //------------------------------------------------------------------------- - - parent->tw.gc.retxl = txn->tw.gc.retxl; - txn->tw.gc.retxl = nullptr; - - parent->tw.retired_pages = txn->tw.retired_pages; - txn->tw.retired_pages = nullptr; - - pnl_free(parent->tw.repnl); - parent->tw.repnl = txn->tw.repnl; - txn->tw.repnl = nullptr; - parent->tw.gc.time_acc = txn->tw.gc.time_acc; - parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed; - - parent->geo = txn->geo; - parent->canary = txn->canary; - parent->flags |= txn->flags & MDBX_TXN_DIRTY; - - /* Move loose pages to parent */ -#if MDBX_ENABLE_REFUND - parent->tw.loose_refund_wl = txn->tw.loose_refund_wl; -#endif /* MDBX_ENABLE_REFUND */ - parent->tw.loose_count = txn->tw.loose_count; - parent->tw.loose_pages = txn->tw.loose_pages; - - if (txn->flags & txn_may_have_cursors) - /* Merge our cursors into parent's and close them */ - txn_done_cursors(txn); - - /* Update parent's DBs array */ - eASSERT(env, parent->n_dbi == txn->n_dbi); - TXN_FOREACH_DBI_ALL(txn, dbi) { - if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { - parent->dbs[dbi] = txn->dbs[dbi]; - /* preserve parent's status */ - const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); - DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still", - parent->dbi_state[dbi], state); - parent->dbi_state[dbi] = state; - } else { - eASSERT(env, txn->dbi_state[dbi] == (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); - } - } - - if (latency) { - ts.prep = osal_monotime(); - ts.gc = /* no gc-update */ ts.prep; - ts.audit = /* no audit */ ts.gc; - ts.write = /* no write */ ts.audit; - ts.sync = /* no sync */ ts.write; - } - txn_merge(parent, txn, parent_retired_len); - env->txn = parent; - parent->nested = nullptr; - tASSERT(parent, dpl_check(parent)); - -#if MDBX_ENABLE_REFUND - txn_refund(parent); - if (ASSERT_ENABLED()) { - /* Check parent's loose pages not suitable for refund */ - for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) { - tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated); - MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); - VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); - } - /* Check parent's reclaimed pages not suitable for refund */ - if (MDBX_PNL_GETSIZE(parent->tw.repnl)) - tASSERT(parent, MDBX_PNL_MOST(parent->tw.repnl) + 1 < parent->geo.first_unallocated); - } -#endif /* MDBX_ENABLE_REFUND */ - - txn->signature = 0; - osal_free(txn); - tASSERT(parent, audit_ex(parent, 0, false) == 0); - rc = MDBX_SUCCESS; - goto provide_latency; + goto fail; } if (!txn->tw.dirtylist) { diff --git a/src/proto.h b/src/proto.h index f2078642..db4f8a56 100644 --- a/src/proto.h +++ b/src/proto.h @@ -68,13 +68,17 @@ enum { }; MDBX_INTERNAL int txn_end(MDBX_txn *txn, unsigned mode); MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx); -MDBX_INTERNAL void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len); MDBX_INTERNAL MDBX_txn *txn_alloc(const MDBX_txn_flags_t flags, MDBX_env *env); MDBX_INTERNAL MDBX_txn *txn_basal_create(const size_t max_dbi); MDBX_INTERNAL void txn_basal_destroy(MDBX_txn *txn); MDBX_INTERNAL int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags); MDBX_INTERNAL void txn_nested_abort(MDBX_txn *nested); +struct commit_timestamp { + uint64_t start, prep, gc, audit, write, sync, gc_cpu; +}; +MDBX_INTERNAL int txn_nested_join(MDBX_txn *txn, struct commit_timestamp *ts); + /* env.c */ MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode); MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika); diff --git a/src/txn.c b/src/txn.c index 17d74af6..ca123b28 100644 --- a/src/txn.c +++ b/src/txn.c @@ -96,8 +96,8 @@ int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { return rc; } -/* Merge child txn into parent */ -void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { +/* Merge pageset of the nested txn into parent */ +static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0); dpl_t *const src = dpl_sort(txn); @@ -1291,3 +1291,130 @@ void txn_nested_abort(MDBX_txn *nested) { pnl_free(nested->tw.repnl); osal_free(nested); } + +int txn_nested_join(MDBX_txn *txn, struct commit_timestamp *ts) { + MDBX_env *const env = txn->env; + MDBX_txn *const parent = txn->parent; + tASSERT(txn, audit_ex(txn, 0, false) == 0); + eASSERT(env, txn != env->basal_txn); + eASSERT(env, parent->signature == txn_signature); + eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0); + eASSERT(env, dpl_check(txn)); + + if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) { + TXN_FOREACH_DBI_ALL(txn, i) { + tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0); + if ((txn->dbi_state[i] & DBI_STALE) && !(parent->dbi_state[i] & DBI_STALE)) + tASSERT(txn, memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0); + } + + tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0); + tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0); + tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0); + tASSERT(txn, txn->tw.loose_count == 0); + + VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid); + return MDBX_RESULT_TRUE; + } + + /* Preserve space for spill list to avoid parent's state corruption + * if allocation fails. */ + const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages; + tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages)); + const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len; + if (retired_delta) { + int err = pnl_need(&txn->tw.repnl, retired_delta); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + + if (txn->tw.spilled.list) { + if (parent->tw.spilled.list) { + int err = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list)); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + spill_purge(txn); + } + + if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent && + !dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) { + return MDBX_ENOMEM; + } + + //------------------------------------------------------------------------- + + parent->tw.gc.retxl = txn->tw.gc.retxl; + txn->tw.gc.retxl = nullptr; + + parent->tw.retired_pages = txn->tw.retired_pages; + txn->tw.retired_pages = nullptr; + + pnl_free(parent->tw.repnl); + parent->tw.repnl = txn->tw.repnl; + txn->tw.repnl = nullptr; + parent->tw.gc.time_acc = txn->tw.gc.time_acc; + parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed; + + parent->geo = txn->geo; + parent->canary = txn->canary; + parent->flags |= txn->flags & MDBX_TXN_DIRTY; + + /* Move loose pages to parent */ +#if MDBX_ENABLE_REFUND + parent->tw.loose_refund_wl = txn->tw.loose_refund_wl; +#endif /* MDBX_ENABLE_REFUND */ + parent->tw.loose_count = txn->tw.loose_count; + parent->tw.loose_pages = txn->tw.loose_pages; + + if (txn->flags & txn_may_have_cursors) + /* Merge our cursors into parent's and close them */ + txn_done_cursors(txn); + + /* Update parent's DBs array */ + eASSERT(env, parent->n_dbi == txn->n_dbi); + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { + parent->dbs[dbi] = txn->dbs[dbi]; + /* preserve parent's status */ + const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still", + parent->dbi_state[dbi], state); + parent->dbi_state[dbi] = state; + } else { + eASSERT(env, txn->dbi_state[dbi] == (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); + } + } + + if (ts) { + ts->prep = osal_monotime(); + ts->gc = /* no gc-update */ ts->prep; + ts->audit = /* no audit */ ts->gc; + ts->write = /* no write */ ts->audit; + ts->sync = /* no sync */ ts->write; + } + txn_merge(parent, txn, parent_retired_len); + env->txn = parent; + parent->nested = nullptr; + tASSERT(parent, dpl_check(parent)); + +#if MDBX_ENABLE_REFUND + txn_refund(parent); + if (ASSERT_ENABLED()) { + /* Check parent's loose pages not suitable for refund */ + for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) { + tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated); + MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *)); + VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *)); + } + /* Check parent's reclaimed pages not suitable for refund */ + if (MDBX_PNL_GETSIZE(parent->tw.repnl)) + tASSERT(parent, MDBX_PNL_MOST(parent->tw.repnl) + 1 < parent->geo.first_unallocated); + } +#endif /* MDBX_ENABLE_REFUND */ + + txn->signature = 0; + osal_free(txn); + tASSERT(parent, audit_ex(parent, 0, false) == 0); + return MDBX_SUCCESS; +}