From 55d190bad9723fd9766466005b1a14e70779fc89 Mon Sep 17 00:00:00 2001
From: Leonid Yuriev <leo@yuriev.ru>
Date: Thu, 22 Oct 2020 17:49:13 +0300
Subject: [PATCH] mdbx: avoid `MDBX_TXN_FULL` while searching for a large
 multi-page region.

Resolves https://github.com/erthink/libmdbx/issues/123
but with TODO:
 - provide a user-configurable threshold instead of currently hard-coded default (MDBX_PNL_MAX/2).

Change-Id: Ia7bfd5f8d36e027444d234e3c3aabe4832313466
---
 src/bits.md |  6 +++---
 src/core.c  | 26 ++++++++++++++++++++------
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/bits.md b/src/bits.md
index 4a18c00a..ba7e4eaa 100644
--- a/src/bits.md
+++ b/src/bits.md
@@ -1,8 +1,8 @@
 N |   MASK  | ENV       | TXN          | DB       | PUT       | DBI        | NODE    | PAGE     |
 --|---------|-----------|--------------|----------|-----------|------------|---------|----------|
-0 |0000 0001|           |TXN_FINISHED  |          |           |DBI_DIRTY   |F_BIGDATA|P_BRANCH
-1 |0000 0002|           |TXN_ERROR     |REVERSEKEY|           |DBI_STALE   |F_SUBDATA|P_LEAF
-2 |0000 0004|           |TXN_DIRTY     |DUPSORT   |           |DBI_FRESH   |F_DUPDATA|P_OVERFLOW
+0 |0000 0001|ALLOC_CACHE|TXN_FINISHED  |          |           |DBI_DIRTY   |F_BIGDATA|P_BRANCH
+1 |0000 0002|ALLOC_GC   |TXN_ERROR     |REVERSEKEY|           |DBI_STALE   |F_SUBDATA|P_LEAF
+2 |0000 0004|ALLOC_NEW  |TXN_DIRTY     |DUPSORT   |           |DBI_FRESH   |F_DUPDATA|P_OVERFLOW
 3 |0000 0008|           |TXN_SPILLS    |INTEGERKEY|           |DBI_CREAT   |         |P_META
 4 |0000 0010|           |TXN_HAS_CHILD |DUPFIXED  |NOOVERWRITE|DBI_VALID   |         |P_DIRTY
 5 |0000 0020|           |              |INTEGERDUP|NODUPDATA  |DBI_USRVALID|         |P_LEAF2
diff --git a/src/core.c b/src/core.c
index 6f0e0762..122d46b2 100644
--- a/src/core.c
+++ b/src/core.c
@@ -5108,12 +5108,6 @@ skip_cache:
   const unsigned wanna_range = num - 1;
 
   while (true) { /* hsr-kick retry loop */
-    /* If our dirty list is already full, we can't do anything */
-    if (unlikely(txn->tw.dirtyroom == 0)) {
-      rc = MDBX_TXN_FULL;
-      goto fail;
-    }
-
     MDBX_cursor_couple recur;
     for (MDBX_cursor_op op = MDBX_FIRST;;
          op = (flags & MDBX_LIFORECLAIM) ? MDBX_PREV : MDBX_NEXT) {
@@ -5154,6 +5148,11 @@ skip_cache:
       }
 
       if (op == MDBX_FIRST) { /* 1st iteration, setup cursor, etc */
+        if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
+            !(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY)) {
+          /* If our dirty list is already full, we can't touch GC */
+          flags &= ~MDBX_ALLOC_GC;
+        }
         if (unlikely(!(flags & MDBX_ALLOC_GC)))
           break /* reclaiming is prohibited for now */;
 
@@ -5264,6 +5263,21 @@ skip_cache:
         goto fail;
       }
       const unsigned gc_len = MDBX_PNL_SIZE(gc_pnl);
+      /* TODO: provide a user-configurable threshold */
+      const unsigned threshold_2_stop_gc_reclaiming = MDBX_PNL_MAX / 2;
+      if (unlikely(gc_len + MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) >
+                   threshold_2_stop_gc_reclaiming)) {
+        /* Stop reclaiming to avoid overflow the page list.
+         * This is a rare case while search for a continuously multi-page region
+         * in a large database. https://github.com/erthink/libmdbx/issues/123 */
+        flags -= MDBX_ALLOC_GC;
+        if (unlikely(flags == 0)) {
+          /* Oh, we can't do anything */
+          rc = MDBX_TXN_FULL;
+          goto fail;
+        }
+        break;
+      }
       rc = mdbx_pnl_need(&txn->tw.reclaimed_pglist, gc_len);
       if (unlikely(rc != MDBX_SUCCESS))
         goto fail;