mdbx: drop P_KEEP flag and use LRU-label insted of.

Related to https://github.com/erthink/libmdbx/issues/186

Change-Id: Ifd3bd2533b40525dd5b0aa05df421c87462c2439
This commit is contained in:
Leonid Yuriev 2021-04-27 19:23:20 +03:00
parent 975413b48d
commit 6134220b8a
3 changed files with 60 additions and 73 deletions

View File

@ -4,19 +4,19 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF 1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF
2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW 2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW
3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META 3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META
4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_SPILLED 4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD
5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2
6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP 6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | |P_BAD 7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | |
8 |0000 0100| | | | | | | 8 |0000 0100| | | | | | |
9 |0000 0200| | | | | | | 9 |0000 0200| | | | | | |
10|0000 0400| | | | | | | 10|0000 0400| | | | | | |
11|0000 0800| | | | | | | 11|0000 0800| | | | | | |
12|0000 1000| | | | | | | 12|0000 1000| | | | | | |
13|0000 2000| | | | | | | 13|0000 2000| | | | | | |P_SPILLED
14|0000 4000|NOSUBDIR | | | | | |P_LOOSE 14|0000 4000|NOSUBDIR | | | | | |P_LOOSE
15|0000 8000| | |DB_VALID |NOSPILL | | |P_KEEP 15|0000 8000| | |DB_VALID |NOSPILL | | |P_FROZEN
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE |P_FROZEN 16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE |
17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | 17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND |
18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP 18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP
19|0008 0000|WRITEMAP |<= | |MULTIPLE 19|0008 0000|WRITEMAP |<= | |MULTIPLE

View File

@ -4194,12 +4194,11 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
if (unlikely(dp->mp_pgno != dl->items[i].pgno)) if (unlikely(dp->mp_pgno != dl->items[i].pgno))
return false; return false;
mdbx_tassert(txn, txn->tw.dirtylru > dl->items[i].lru); mdbx_tassert(txn, txn->tw.dirtylru >= dl->items[i].lru);
if (unlikely(txn->tw.dirtylru <= dl->items[i].lru)) if (unlikely(txn->tw.dirtylru < dl->items[i].lru))
return false; return false;
mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp));
mdbx_tassert(txn, (dp->mp_flags & P_KEEP) == 0);
if (dp->mp_flags == P_LOOSE) { if (dp->mp_flags == P_LOOSE) {
loose += 1; loose += 1;
} else if (unlikely(!IS_MODIFIABLE(txn, dp))) } else if (unlikely(!IS_MODIFIABLE(txn, dp)))
@ -4826,58 +4825,53 @@ static __inline int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
} }
/* Set P_KEEP in dirty, non-overflow, non-sub pages watched by txn. */ /* Set P_KEEP in dirty, non-overflow, non-sub pages watched by txn. */
static void mdbx_cursor_keep(MDBX_cursor *mc) { static void mdbx_cursor_keep(MDBX_txn *txn, MDBX_cursor *mc) {
const unsigned mask = P_SUBP | P_LOOSE | P_KEEP | P_SPILLED; if (!(mc->mc_flags & C_INITIALIZED))
if (mc->mc_flags & C_INITIALIZED) { return;
MDBX_cursor *m3 = mc;
for (;;) { loop:;
MDBX_page *mp = NULL; const MDBX_page *mp = NULL;
for (unsigned j = 0; j < m3->mc_snum; j++) { for (unsigned i = 0; i < mc->mc_snum; i++) {
mp = m3->mc_pg[j]; mp = mc->mc_pg[i];
if (IS_MODIFIABLE(mc->mc_txn, mp) && !(mp->mp_flags & mask)) if (IS_MODIFIABLE(txn, mp) && mp->mp_flags < P_SUBP) {
mp->mp_flags |= P_KEEP; unsigned const n = mdbx_dpl_search(txn, mp->mp_pgno);
if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno)
txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru;
}
} }
if (!(mp && IS_LEAF(mp))) if (!(mp && IS_LEAF(mp)))
break; return;
/* Proceed to mx if it is at a sub-database */ /* Proceed to mx if it is at a sub-database */
MDBX_xcursor *mx = m3->mc_xcursor; MDBX_xcursor *mx = mc->mc_xcursor;
if (!(mx && (mx->mx_cursor.mc_flags & C_INITIALIZED))) if (!(mx && (mx->mx_cursor.mc_flags & C_INITIALIZED)))
break; return;
const unsigned nkeys = page_numkeys(mp); const unsigned nkeys = page_numkeys(mp);
unsigned ki = m3->mc_ki[m3->mc_top]; unsigned ki = mc->mc_ki[mc->mc_top];
mdbx_cassert(mc, nkeys > 0 && mdbx_cassert(mc, nkeys > 0 &&
(ki < nkeys || (ki < nkeys ||
(ki == nkeys && (mx->mx_cursor.mc_flags & C_EOF)))); (ki == nkeys && (mx->mx_cursor.mc_flags & C_EOF))));
ki -= ki >= nkeys; ki -= ki >= nkeys;
if (!(node_flags(page_node(mp, ki)) & F_SUBDATA)) if ((node_flags(page_node(mp, ki)) & F_SUBDATA)) {
break; mc = &mx->mx_cursor;
m3 = &mx->mx_cursor; goto loop;
}
} }
} }
static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
if (m0) if (m0)
mdbx_cursor_keep(m0); mdbx_cursor_keep(txn, m0);
for (unsigned i = FREE_DBI; i < txn->mt_numdbs; ++i) for (unsigned i = FREE_DBI; i < txn->mt_numdbs; ++i) {
if (txn->mt_dbistate[i] & DBI_DIRTY) const pgno_t pgno = txn->mt_dbs[i].md_root;
if ((txn->mt_dbistate[i] & DBI_DIRTY) && pgno != P_INVALID) {
unsigned const n = mdbx_dpl_search(txn, pgno);
if (likely(txn->tw.dirtylist->items[n].pgno == pgno)) {
txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru;
for (MDBX_cursor *mc = txn->tw.cursors[i]; mc; mc = mc->mc_next) for (MDBX_cursor *mc = txn->tw.cursors[i]; mc; mc = mc->mc_next)
if (mc != m0) if (mc != m0)
mdbx_cursor_keep(mc); mdbx_cursor_keep(txn, mc);
/* Mark dirty root pages */
const unsigned mask = P_SUBP | P_LOOSE | P_KEEP | P_SPILLED;
for (unsigned i = 0; i < txn->mt_numdbs; i++) {
if (txn->mt_dbistate[i] & DBI_DIRTY) {
pgno_t pgno = txn->mt_dbs[i].md_root;
if (pgno == P_INVALID)
continue;
unsigned di = mdbx_dpl_exist(txn, pgno);
if (di) {
MDBX_page *dp = txn->tw.dirtylist->items[di].ptr;
if (!(dp->mp_flags & mask))
dp->mp_flags |= P_KEEP;
} }
} }
} }
@ -4890,18 +4884,20 @@ static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
static unsigned spill_prio(const MDBX_txn *txn, const unsigned i, static unsigned spill_prio(const MDBX_txn *txn, const unsigned i,
const unsigned lru_min, const unsigned reciprocal) { const unsigned lru_min, const unsigned reciprocal) {
MDBX_dpl *const dl = txn->tw.dirtylist; MDBX_dpl *const dl = txn->tw.dirtylist;
const pgno_t pgno = dl->items[i].pgno;
MDBX_page *const dp = dl->items[i].ptr;
const unsigned lru = dl->items[i].lru; const unsigned lru = dl->items[i].lru;
const unsigned npages = dpl_npages(dl, i); const unsigned npages = dpl_npages(dl, i);
if (dp->mp_flags & (P_LOOSE | P_KEEP | P_SPILLED)) { const pgno_t pgno = dl->items[i].pgno;
if (lru == txn->tw.dirtylru) {
mdbx_debug("skip %s %u page %" PRIaPGNO, "keep", npages, pgno);
return 256;
}
MDBX_page *const dp = dl->items[i].ptr;
if (dp->mp_flags & (P_LOOSE | P_SPILLED)) {
mdbx_debug("skip %s %u page %" PRIaPGNO, mdbx_debug("skip %s %u page %" PRIaPGNO,
(dp->mp_flags & P_LOOSE) (dp->mp_flags & P_LOOSE)
? "loose" ? "loose"
: (dp->mp_flags & P_LOOSE) : (dp->mp_flags & P_LOOSE) ? "loose" : "parent-spilled",
? "loose"
: (dp->mp_flags & P_SPILLED) ? "parent-spilled"
: "keep",
npages, pgno); npages, pgno);
return 256; return 256;
} }
@ -5296,19 +5292,11 @@ static int mdbx_txn_spill(MDBX_txn *txn, MDBX_cursor *m0, unsigned need) {
continue; continue;
} }
} }
if (unlikely(prio > 255 && (dp->mp_flags & P_KEEP)))
/* Reset any dirty pages we kept that page_flush didn't see */
dp->mp_flags -= P_KEEP;
dl->items[++w] = dl->items[r]; dl->items[++w] = dl->items[r];
} }
while (r <= dl->length) { while (r <= dl->length)
MDBX_page *const dp = dl->items[r].ptr;
if (unlikely(dp->mp_flags & P_KEEP))
/* Reset any dirty pages we kept that page_flush didn't see */
dp->mp_flags -= P_KEEP;
dl->items[++w] = dl->items[r++]; dl->items[++w] = dl->items[r++];
}
mdbx_tassert(txn, r - 1 - w == spilled); mdbx_tassert(txn, r - 1 - w == spilled);
if (unlikely(spilled == 0)) { if (unlikely(spilled == 0)) {
mdbx_tassert(txn, ctx.iov_items == 0 && rc == MDBX_SUCCESS); mdbx_tassert(txn, ctx.iov_items == 0 && rc == MDBX_SUCCESS);

View File

@ -547,13 +547,12 @@ typedef struct MDBX_page {
#define P_LEAF 0x02 /* leaf page */ #define P_LEAF 0x02 /* leaf page */
#define P_OVERFLOW 0x04 /* overflow page */ #define P_OVERFLOW 0x04 /* overflow page */
#define P_META 0x08 /* meta page */ #define P_META 0x08 /* meta page */
#define P_SPILLED 0x10 /* spilled in parent txn */ #define P_BAD 0x10 /* explicit flag for invalid/bad page */
#define P_LEAF2 0x20 /* for MDBX_DUPFIXED records */ #define P_LEAF2 0x20 /* for MDBX_DUPFIXED records */
#define P_SUBP 0x40 /* for MDBX_DUPSORT sub-pages */ #define P_SUBP 0x40 /* for MDBX_DUPSORT sub-pages */
#define P_BAD 0x80 /* explicit flag for invalid/bad page */ #define P_SPILLED 0x2000 /* spilled in parent txn */
#define P_LOOSE 0x4000 /* page was dirtied then freed, can be reused */ #define P_LOOSE 0x4000 /* page was dirtied then freed, can be reused */
#define P_KEEP 0x8000 /* leave this page alone during spill */ #define P_FROZEN 0x8000 /* used for retire page with known status */
#define P_FROZEN 0x10000 /* used for retire page with known status */
#define P_ILL_BITS (~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED)) #define P_ILL_BITS (~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED))
uint16_t mp_flags; uint16_t mp_flags;
union { union {