mdbx: avoid MDBX_TXN_FULL while searching for a large multi-page region.

Resolves https://github.com/erthink/libmdbx/issues/123
but with TODO:
 - provide a user-configurable threshold instead of currently hard-coded default (MDBX_PNL_MAX/2).

Change-Id: Ia7bfd5f8d36e027444d234e3c3aabe4832313466
This commit is contained in:
Leonid Yuriev 2020-10-22 17:49:13 +03:00
parent 8a7caec54a
commit 55d190bad9
2 changed files with 23 additions and 9 deletions

View File

@ -1,8 +1,8 @@
N | MASK | ENV | TXN | DB | PUT | DBI | NODE | PAGE | N | MASK | ENV | TXN | DB | PUT | DBI | NODE | PAGE |
--|---------|-----------|--------------|----------|-----------|------------|---------|----------| --|---------|-----------|--------------|----------|-----------|------------|---------|----------|
0 |0000 0001| |TXN_FINISHED | | |DBI_DIRTY |F_BIGDATA|P_BRANCH 0 |0000 0001|ALLOC_CACHE|TXN_FINISHED | | |DBI_DIRTY |F_BIGDATA|P_BRANCH
1 |0000 0002| |TXN_ERROR |REVERSEKEY| |DBI_STALE |F_SUBDATA|P_LEAF 1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY| |DBI_STALE |F_SUBDATA|P_LEAF
2 |0000 0004| |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW 2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW
3 |0000 0008| |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META 3 |0000 0008| |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META
4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_DIRTY 4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_DIRTY
5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2

View File

@ -5108,12 +5108,6 @@ skip_cache:
const unsigned wanna_range = num - 1; const unsigned wanna_range = num - 1;
while (true) { /* hsr-kick retry loop */ while (true) { /* hsr-kick retry loop */
/* If our dirty list is already full, we can't do anything */
if (unlikely(txn->tw.dirtyroom == 0)) {
rc = MDBX_TXN_FULL;
goto fail;
}
MDBX_cursor_couple recur; MDBX_cursor_couple recur;
for (MDBX_cursor_op op = MDBX_FIRST;; for (MDBX_cursor_op op = MDBX_FIRST;;
op = (flags & MDBX_LIFORECLAIM) ? MDBX_PREV : MDBX_NEXT) { op = (flags & MDBX_LIFORECLAIM) ? MDBX_PREV : MDBX_NEXT) {
@ -5154,6 +5148,11 @@ skip_cache:
} }
if (op == MDBX_FIRST) { /* 1st iteration, setup cursor, etc */ if (op == MDBX_FIRST) { /* 1st iteration, setup cursor, etc */
if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
!(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY)) {
/* If our dirty list is already full, we can't touch GC */
flags &= ~MDBX_ALLOC_GC;
}
if (unlikely(!(flags & MDBX_ALLOC_GC))) if (unlikely(!(flags & MDBX_ALLOC_GC)))
break /* reclaiming is prohibited for now */; break /* reclaiming is prohibited for now */;
@ -5264,6 +5263,21 @@ skip_cache:
goto fail; goto fail;
} }
const unsigned gc_len = MDBX_PNL_SIZE(gc_pnl); const unsigned gc_len = MDBX_PNL_SIZE(gc_pnl);
/* TODO: provide a user-configurable threshold */
const unsigned threshold_2_stop_gc_reclaiming = MDBX_PNL_MAX / 2;
if (unlikely(gc_len + MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) >
threshold_2_stop_gc_reclaiming)) {
/* Stop reclaiming to avoid overflow the page list.
* This is a rare case while search for a continuously multi-page region
* in a large database. https://github.com/erthink/libmdbx/issues/123 */
flags -= MDBX_ALLOC_GC;
if (unlikely(flags == 0)) {
/* Oh, we can't do anything */
rc = MDBX_TXN_FULL;
goto fail;
}
break;
}
rc = mdbx_pnl_need(&txn->tw.reclaimed_pglist, gc_len); rc = mdbx_pnl_need(&txn->tw.reclaimed_pglist, gc_len);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;