diff --git a/src/bits.md b/src/bits.md index c2059fba..c42b3ef5 100644 --- a/src/bits.md +++ b/src/bits.md @@ -4,19 +4,19 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF 2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW 3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META -4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_SPILLED +4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD 5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP -7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | |P_BAD +7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | 8 |0000 0100| | | | | | | 9 |0000 0200| | | | | | | 10|0000 0400| | | | | | | 11|0000 0800| | | | | | | 12|0000 1000| | | | | | | -13|0000 2000| | | | | | | +13|0000 2000| | | | | | |P_SPILLED 14|0000 4000|NOSUBDIR | | | | | |P_LOOSE -15|0000 8000| | |DB_VALID |NOSPILL | | |P_KEEP -16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE |P_FROZEN +15|0000 8000| | |DB_VALID |NOSPILL | | |P_FROZEN +16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE | 17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | 18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP 19|0008 0000|WRITEMAP |<= | |MULTIPLE diff --git a/src/core.c b/src/core.c index 965eca0f..8ff626f8 100644 --- a/src/core.c +++ b/src/core.c @@ -4194,12 +4194,11 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) { if (unlikely(dp->mp_pgno != dl->items[i].pgno)) return false; - mdbx_tassert(txn, txn->tw.dirtylru > dl->items[i].lru); - if (unlikely(txn->tw.dirtylru <= dl->items[i].lru)) + mdbx_tassert(txn, txn->tw.dirtylru >= dl->items[i].lru); + if (unlikely(txn->tw.dirtylru < dl->items[i].lru)) return false; mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); - mdbx_tassert(txn, (dp->mp_flags & P_KEEP) == 0); if (dp->mp_flags == P_LOOSE) { loose += 1; } else if (unlikely(!IS_MODIFIABLE(txn, dp))) @@ -4826,58 +4825,53 @@ static __inline int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) { } /* Set P_KEEP in dirty, non-overflow, non-sub pages watched by txn. */ -static void mdbx_cursor_keep(MDBX_cursor *mc) { - const unsigned mask = P_SUBP | P_LOOSE | P_KEEP | P_SPILLED; - if (mc->mc_flags & C_INITIALIZED) { - MDBX_cursor *m3 = mc; - for (;;) { - MDBX_page *mp = NULL; - for (unsigned j = 0; j < m3->mc_snum; j++) { - mp = m3->mc_pg[j]; - if (IS_MODIFIABLE(mc->mc_txn, mp) && !(mp->mp_flags & mask)) - mp->mp_flags |= P_KEEP; - } - if (!(mp && IS_LEAF(mp))) - break; - /* Proceed to mx if it is at a sub-database */ - MDBX_xcursor *mx = m3->mc_xcursor; - if (!(mx && (mx->mx_cursor.mc_flags & C_INITIALIZED))) - break; - const unsigned nkeys = page_numkeys(mp); - unsigned ki = m3->mc_ki[m3->mc_top]; - mdbx_cassert(mc, nkeys > 0 && - (ki < nkeys || - (ki == nkeys && (mx->mx_cursor.mc_flags & C_EOF)))); - ki -= ki >= nkeys; - if (!(node_flags(page_node(mp, ki)) & F_SUBDATA)) - break; - m3 = &mx->mx_cursor; +static void mdbx_cursor_keep(MDBX_txn *txn, MDBX_cursor *mc) { + if (!(mc->mc_flags & C_INITIALIZED)) + return; + +loop:; + const MDBX_page *mp = NULL; + for (unsigned i = 0; i < mc->mc_snum; i++) { + mp = mc->mc_pg[i]; + if (IS_MODIFIABLE(txn, mp) && mp->mp_flags < P_SUBP) { + unsigned const n = mdbx_dpl_search(txn, mp->mp_pgno); + if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno) + txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru; } } + if (!(mp && IS_LEAF(mp))) + return; + + /* Proceed to mx if it is at a sub-database */ + MDBX_xcursor *mx = mc->mc_xcursor; + if (!(mx && (mx->mx_cursor.mc_flags & C_INITIALIZED))) + return; + + const unsigned nkeys = page_numkeys(mp); + unsigned ki = mc->mc_ki[mc->mc_top]; + mdbx_cassert(mc, nkeys > 0 && + (ki < nkeys || + (ki == nkeys && (mx->mx_cursor.mc_flags & C_EOF)))); + ki -= ki >= nkeys; + if ((node_flags(page_node(mp, ki)) & F_SUBDATA)) { + mc = &mx->mx_cursor; + goto loop; + } } static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { if (m0) - mdbx_cursor_keep(m0); + mdbx_cursor_keep(txn, m0); - for (unsigned i = FREE_DBI; i < txn->mt_numdbs; ++i) - if (txn->mt_dbistate[i] & DBI_DIRTY) - for (MDBX_cursor *mc = txn->tw.cursors[i]; mc; mc = mc->mc_next) - if (mc != m0) - mdbx_cursor_keep(mc); - - /* Mark dirty root pages */ - const unsigned mask = P_SUBP | P_LOOSE | P_KEEP | P_SPILLED; - for (unsigned i = 0; i < txn->mt_numdbs; i++) { - if (txn->mt_dbistate[i] & DBI_DIRTY) { - pgno_t pgno = txn->mt_dbs[i].md_root; - if (pgno == P_INVALID) - continue; - unsigned di = mdbx_dpl_exist(txn, pgno); - if (di) { - MDBX_page *dp = txn->tw.dirtylist->items[di].ptr; - if (!(dp->mp_flags & mask)) - dp->mp_flags |= P_KEEP; + for (unsigned i = FREE_DBI; i < txn->mt_numdbs; ++i) { + const pgno_t pgno = txn->mt_dbs[i].md_root; + if ((txn->mt_dbistate[i] & DBI_DIRTY) && pgno != P_INVALID) { + unsigned const n = mdbx_dpl_search(txn, pgno); + if (likely(txn->tw.dirtylist->items[n].pgno == pgno)) { + txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru; + for (MDBX_cursor *mc = txn->tw.cursors[i]; mc; mc = mc->mc_next) + if (mc != m0) + mdbx_cursor_keep(txn, mc); } } } @@ -4890,18 +4884,20 @@ static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { static unsigned spill_prio(const MDBX_txn *txn, const unsigned i, const unsigned lru_min, const unsigned reciprocal) { MDBX_dpl *const dl = txn->tw.dirtylist; - const pgno_t pgno = dl->items[i].pgno; - MDBX_page *const dp = dl->items[i].ptr; const unsigned lru = dl->items[i].lru; const unsigned npages = dpl_npages(dl, i); - if (dp->mp_flags & (P_LOOSE | P_KEEP | P_SPILLED)) { + const pgno_t pgno = dl->items[i].pgno; + if (lru == txn->tw.dirtylru) { + mdbx_debug("skip %s %u page %" PRIaPGNO, "keep", npages, pgno); + return 256; + } + + MDBX_page *const dp = dl->items[i].ptr; + if (dp->mp_flags & (P_LOOSE | P_SPILLED)) { mdbx_debug("skip %s %u page %" PRIaPGNO, (dp->mp_flags & P_LOOSE) ? "loose" - : (dp->mp_flags & P_LOOSE) - ? "loose" - : (dp->mp_flags & P_SPILLED) ? "parent-spilled" - : "keep", + : (dp->mp_flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno); return 256; } @@ -5296,19 +5292,11 @@ static int mdbx_txn_spill(MDBX_txn *txn, MDBX_cursor *m0, unsigned need) { continue; } } - if (unlikely(prio > 255 && (dp->mp_flags & P_KEEP))) - /* Reset any dirty pages we kept that page_flush didn't see */ - dp->mp_flags -= P_KEEP; dl->items[++w] = dl->items[r]; } - while (r <= dl->length) { - MDBX_page *const dp = dl->items[r].ptr; - if (unlikely(dp->mp_flags & P_KEEP)) - /* Reset any dirty pages we kept that page_flush didn't see */ - dp->mp_flags -= P_KEEP; + while (r <= dl->length) dl->items[++w] = dl->items[r++]; - } mdbx_tassert(txn, r - 1 - w == spilled); if (unlikely(spilled == 0)) { mdbx_tassert(txn, ctx.iov_items == 0 && rc == MDBX_SUCCESS); diff --git a/src/internals.h b/src/internals.h index bec54c76..02b8626a 100644 --- a/src/internals.h +++ b/src/internals.h @@ -547,13 +547,12 @@ typedef struct MDBX_page { #define P_LEAF 0x02 /* leaf page */ #define P_OVERFLOW 0x04 /* overflow page */ #define P_META 0x08 /* meta page */ -#define P_SPILLED 0x10 /* spilled in parent txn */ +#define P_BAD 0x10 /* explicit flag for invalid/bad page */ #define P_LEAF2 0x20 /* for MDBX_DUPFIXED records */ #define P_SUBP 0x40 /* for MDBX_DUPSORT sub-pages */ -#define P_BAD 0x80 /* explicit flag for invalid/bad page */ +#define P_SPILLED 0x2000 /* spilled in parent txn */ #define P_LOOSE 0x4000 /* page was dirtied then freed, can be reused */ -#define P_KEEP 0x8000 /* leave this page alone during spill */ -#define P_FROZEN 0x10000 /* used for retire page with known status */ +#define P_FROZEN 0x8000 /* used for retire page with known status */ #define P_ILL_BITS (~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED)) uint16_t mp_flags; union {