mdbx: drop P_KEEP flag and use LRU-label insted of.

Related to https://github.com/erthink/libmdbx/issues/186

Change-Id: Ifd3bd2533b40525dd5b0aa05df421c87462c2439
This commit is contained in:
Leonid Yuriev 2021-04-27 19:23:20 +03:00
parent 975413b48d
commit 6134220b8a
3 changed files with 60 additions and 73 deletions

View File

@ -4,19 +4,19 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF
2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW
3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META
4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_SPILLED
4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD
5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2
6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | |P_BAD
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | |
8 |0000 0100| | | | | | |
9 |0000 0200| | | | | | |
10|0000 0400| | | | | | |
11|0000 0800| | | | | | |
12|0000 1000| | | | | | |
13|0000 2000| | | | | | |
13|0000 2000| | | | | | |P_SPILLED
14|0000 4000|NOSUBDIR | | | | | |P_LOOSE
15|0000 8000| | |DB_VALID |NOSPILL | | |P_KEEP
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE |P_FROZEN
15|0000 8000| | |DB_VALID |NOSPILL | | |P_FROZEN
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE |
17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND |
18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP
19|0008 0000|WRITEMAP |<= | |MULTIPLE

View File

@ -4194,12 +4194,11 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
if (unlikely(dp->mp_pgno != dl->items[i].pgno))
return false;
mdbx_tassert(txn, txn->tw.dirtylru > dl->items[i].lru);
if (unlikely(txn->tw.dirtylru <= dl->items[i].lru))
mdbx_tassert(txn, txn->tw.dirtylru >= dl->items[i].lru);
if (unlikely(txn->tw.dirtylru < dl->items[i].lru))
return false;
mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp));
mdbx_tassert(txn, (dp->mp_flags & P_KEEP) == 0);
if (dp->mp_flags == P_LOOSE) {
loose += 1;
} else if (unlikely(!IS_MODIFIABLE(txn, dp)))
@ -4826,58 +4825,53 @@ static __inline int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
}
/* Set P_KEEP in dirty, non-overflow, non-sub pages watched by txn. */
static void mdbx_cursor_keep(MDBX_cursor *mc) {
const unsigned mask = P_SUBP | P_LOOSE | P_KEEP | P_SPILLED;
if (mc->mc_flags & C_INITIALIZED) {
MDBX_cursor *m3 = mc;
for (;;) {
MDBX_page *mp = NULL;
for (unsigned j = 0; j < m3->mc_snum; j++) {
mp = m3->mc_pg[j];
if (IS_MODIFIABLE(mc->mc_txn, mp) && !(mp->mp_flags & mask))
mp->mp_flags |= P_KEEP;
}
if (!(mp && IS_LEAF(mp)))
break;
/* Proceed to mx if it is at a sub-database */
MDBX_xcursor *mx = m3->mc_xcursor;
if (!(mx && (mx->mx_cursor.mc_flags & C_INITIALIZED)))
break;
const unsigned nkeys = page_numkeys(mp);
unsigned ki = m3->mc_ki[m3->mc_top];
mdbx_cassert(mc, nkeys > 0 &&
(ki < nkeys ||
(ki == nkeys && (mx->mx_cursor.mc_flags & C_EOF))));
ki -= ki >= nkeys;
if (!(node_flags(page_node(mp, ki)) & F_SUBDATA))
break;
m3 = &mx->mx_cursor;
static void mdbx_cursor_keep(MDBX_txn *txn, MDBX_cursor *mc) {
if (!(mc->mc_flags & C_INITIALIZED))
return;
loop:;
const MDBX_page *mp = NULL;
for (unsigned i = 0; i < mc->mc_snum; i++) {
mp = mc->mc_pg[i];
if (IS_MODIFIABLE(txn, mp) && mp->mp_flags < P_SUBP) {
unsigned const n = mdbx_dpl_search(txn, mp->mp_pgno);
if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno)
txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru;
}
}
if (!(mp && IS_LEAF(mp)))
return;
/* Proceed to mx if it is at a sub-database */
MDBX_xcursor *mx = mc->mc_xcursor;
if (!(mx && (mx->mx_cursor.mc_flags & C_INITIALIZED)))
return;
const unsigned nkeys = page_numkeys(mp);
unsigned ki = mc->mc_ki[mc->mc_top];
mdbx_cassert(mc, nkeys > 0 &&
(ki < nkeys ||
(ki == nkeys && (mx->mx_cursor.mc_flags & C_EOF))));
ki -= ki >= nkeys;
if ((node_flags(page_node(mp, ki)) & F_SUBDATA)) {
mc = &mx->mx_cursor;
goto loop;
}
}
static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
if (m0)
mdbx_cursor_keep(m0);
mdbx_cursor_keep(txn, m0);
for (unsigned i = FREE_DBI; i < txn->mt_numdbs; ++i)
if (txn->mt_dbistate[i] & DBI_DIRTY)
for (MDBX_cursor *mc = txn->tw.cursors[i]; mc; mc = mc->mc_next)
if (mc != m0)
mdbx_cursor_keep(mc);
/* Mark dirty root pages */
const unsigned mask = P_SUBP | P_LOOSE | P_KEEP | P_SPILLED;
for (unsigned i = 0; i < txn->mt_numdbs; i++) {
if (txn->mt_dbistate[i] & DBI_DIRTY) {
pgno_t pgno = txn->mt_dbs[i].md_root;
if (pgno == P_INVALID)
continue;
unsigned di = mdbx_dpl_exist(txn, pgno);
if (di) {
MDBX_page *dp = txn->tw.dirtylist->items[di].ptr;
if (!(dp->mp_flags & mask))
dp->mp_flags |= P_KEEP;
for (unsigned i = FREE_DBI; i < txn->mt_numdbs; ++i) {
const pgno_t pgno = txn->mt_dbs[i].md_root;
if ((txn->mt_dbistate[i] & DBI_DIRTY) && pgno != P_INVALID) {
unsigned const n = mdbx_dpl_search(txn, pgno);
if (likely(txn->tw.dirtylist->items[n].pgno == pgno)) {
txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru;
for (MDBX_cursor *mc = txn->tw.cursors[i]; mc; mc = mc->mc_next)
if (mc != m0)
mdbx_cursor_keep(txn, mc);
}
}
}
@ -4890,18 +4884,20 @@ static void mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
static unsigned spill_prio(const MDBX_txn *txn, const unsigned i,
const unsigned lru_min, const unsigned reciprocal) {
MDBX_dpl *const dl = txn->tw.dirtylist;
const pgno_t pgno = dl->items[i].pgno;
MDBX_page *const dp = dl->items[i].ptr;
const unsigned lru = dl->items[i].lru;
const unsigned npages = dpl_npages(dl, i);
if (dp->mp_flags & (P_LOOSE | P_KEEP | P_SPILLED)) {
const pgno_t pgno = dl->items[i].pgno;
if (lru == txn->tw.dirtylru) {
mdbx_debug("skip %s %u page %" PRIaPGNO, "keep", npages, pgno);
return 256;
}
MDBX_page *const dp = dl->items[i].ptr;
if (dp->mp_flags & (P_LOOSE | P_SPILLED)) {
mdbx_debug("skip %s %u page %" PRIaPGNO,
(dp->mp_flags & P_LOOSE)
? "loose"
: (dp->mp_flags & P_LOOSE)
? "loose"
: (dp->mp_flags & P_SPILLED) ? "parent-spilled"
: "keep",
: (dp->mp_flags & P_LOOSE) ? "loose" : "parent-spilled",
npages, pgno);
return 256;
}
@ -5296,19 +5292,11 @@ static int mdbx_txn_spill(MDBX_txn *txn, MDBX_cursor *m0, unsigned need) {
continue;
}
}
if (unlikely(prio > 255 && (dp->mp_flags & P_KEEP)))
/* Reset any dirty pages we kept that page_flush didn't see */
dp->mp_flags -= P_KEEP;
dl->items[++w] = dl->items[r];
}
while (r <= dl->length) {
MDBX_page *const dp = dl->items[r].ptr;
if (unlikely(dp->mp_flags & P_KEEP))
/* Reset any dirty pages we kept that page_flush didn't see */
dp->mp_flags -= P_KEEP;
while (r <= dl->length)
dl->items[++w] = dl->items[r++];
}
mdbx_tassert(txn, r - 1 - w == spilled);
if (unlikely(spilled == 0)) {
mdbx_tassert(txn, ctx.iov_items == 0 && rc == MDBX_SUCCESS);

View File

@ -547,13 +547,12 @@ typedef struct MDBX_page {
#define P_LEAF 0x02 /* leaf page */
#define P_OVERFLOW 0x04 /* overflow page */
#define P_META 0x08 /* meta page */
#define P_SPILLED 0x10 /* spilled in parent txn */
#define P_BAD 0x10 /* explicit flag for invalid/bad page */
#define P_LEAF2 0x20 /* for MDBX_DUPFIXED records */
#define P_SUBP 0x40 /* for MDBX_DUPSORT sub-pages */
#define P_BAD 0x80 /* explicit flag for invalid/bad page */
#define P_SPILLED 0x2000 /* spilled in parent txn */
#define P_LOOSE 0x4000 /* page was dirtied then freed, can be reused */
#define P_KEEP 0x8000 /* leave this page alone during spill */
#define P_FROZEN 0x10000 /* used for retire page with known status */
#define P_FROZEN 0x8000 /* used for retire page with known status */
#define P_ILL_BITS (~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED))
uint16_t mp_flags;
union {