From 9ea2d6ddbaa807fc1a24c54284425724d1fd5ef8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?=
 =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= <leo@yuriev.ru>
Date: Fri, 28 Mar 2025 16:40:22 +0300
Subject: [PATCH] =?UTF-8?q?mdbx-tests:=20=D1=80=D0=B0=D1=81=D1=88=D0=B8?=
 =?UTF-8?q?=D1=80=D0=B5=D0=BD=D0=B8=D0=B5=20extra/details-rkl=20=D0=B4?=
 =?UTF-8?q?=D0=BB=D1=8F=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8?=
 =?UTF-8?q?=20hole-=D0=B8=D1=82=D0=B5=D1=80=D0=B0=D1=82=D0=BE=D1=80=D0=BE?=
 =?UTF-8?q?=D0=B2.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/rkl.c                |   4 +-
 test/extra/details_rkl.c | 265 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 260 insertions(+), 9 deletions(-)

diff --git a/src/rkl.c b/src/rkl.c
index ad3d81a9..2362dd42 100644
--- a/src/rkl.c
+++ b/src/rkl.c
@@ -402,7 +402,9 @@ txnid_t rkl_iterator_turn(rkl_iterator_t *iter, const rkl_t *rkl, const bool rev
 }
 
 #if 1
-#define DEBUG_HOLE(hole) do {} while(0)
+#define DEBUG_HOLE(hole)                                                                                               \
+  do {                                                                                                                 \
+  } while (0)
 #else
 #define DEBUG_HOLE(hole)                                                                                               \
   do {                                                                                                                 \
diff --git a/test/extra/details_rkl.c b/test/extra/details_rkl.c
index 0f641d1a..586e738d 100644
--- a/test/extra/details_rkl.c
+++ b/test/extra/details_rkl.c
@@ -15,7 +15,7 @@ MDBX_MAYBE_UNUSED __cold void debug_log_sub(int level, const char *function, int
 
 /*-----------------------------------------------------------------------------*/
 
-static size_t tst_failed, tst_ok, tst_iterations, tst_cases;
+static size_t tst_failed, tst_ok, tst_iterations, tst_cases, tst_cases_hole;
 #ifndef NDEBUG
 static size_t tst_target;
 #endif
@@ -23,6 +23,7 @@ static size_t tst_target;
 static bool check_bool(bool v, bool expect, const char *fn, unsigned line) {
   if (unlikely(v != expect)) {
     ++tst_failed;
+    fflush(nullptr);
     fprintf(stderr, "iteration %zi: got %s, expected %s, at %s:%u\n", tst_iterations, v ? "true" : "false",
             expect ? "true" : "false", fn, line);
     fflush(nullptr);
@@ -35,6 +36,7 @@ static bool check_bool(bool v, bool expect, const char *fn, unsigned line) {
 static bool check_eq(uint64_t v, uint64_t expect, const char *fn, unsigned line) {
   if (unlikely(v != expect)) {
     ++tst_failed;
+    fflush(nullptr);
     fprintf(stderr, "iteration %zi: %" PRIu64 " (got) != %" PRIu64 " (expected), at %s:%u\n", tst_iterations, v, expect,
             fn, line);
     fflush(nullptr);
@@ -71,6 +73,20 @@ void trivia(void) {
   CHECK_EQ(rkl_iterator_turn(&r, &x, true), 0);
   CHECK_TRUE(rkl_check(&x));
 
+  rkl_hole_t hole;
+  hole = rkl_iterator_hole(&f, &x, true);
+  CHECK_EQ(hole.begin, 1);
+  CHECK_EQ(hole.length, 0);
+  hole = rkl_iterator_hole(&f, &x, false);
+  CHECK_EQ(hole.begin, MAX_TXNID);
+  CHECK_EQ(hole.length, 0);
+  hole = rkl_iterator_hole(&r, &x, true);
+  CHECK_EQ(hole.begin, 1);
+  CHECK_EQ(hole.length, 0);
+  hole = rkl_iterator_hole(&r, &x, false);
+  CHECK_EQ(hole.begin, MAX_TXNID);
+  CHECK_EQ(hole.length, 0);
+
   CHECK_EQ(rkl_push(&x, 42, false), MDBX_SUCCESS);
   CHECK_TRUE(rkl_check(&x));
   CHECK_FALSE(rkl_empty(&x));
@@ -97,6 +113,40 @@ void trivia(void) {
   CHECK_EQ(rkl_iterator_turn(&r, &x, false), 42);
   CHECK_EQ(rkl_iterator_turn(&r, &x, false), 0);
 
+  rkl_iterator_init(&f, &x, false);
+  hole = rkl_iterator_hole(&f, &x, false);
+  CHECK_EQ(hole.begin, 43);
+  CHECK_EQ(hole.length, MAX_TXNID - 43);
+  hole = rkl_iterator_hole(&f, &x, false);
+  CHECK_EQ(hole.begin, MAX_TXNID);
+  CHECK_EQ(hole.length, 0);
+  hole = rkl_iterator_hole(&f, &x, true);
+  CHECK_EQ(hole.begin, 43);
+  CHECK_EQ(hole.length, MAX_TXNID - 43);
+  hole = rkl_iterator_hole(&f, &x, true);
+  CHECK_EQ(hole.begin, 1);
+  CHECK_EQ(hole.length, 41);
+  hole = rkl_iterator_hole(&f, &x, true);
+  CHECK_EQ(hole.begin, 1);
+  CHECK_EQ(hole.length, 41);
+
+  rkl_iterator_init(&r, &x, true);
+  hole = rkl_iterator_hole(&r, &x, false);
+  CHECK_EQ(hole.begin, MAX_TXNID);
+  CHECK_EQ(hole.length, 0);
+  hole = rkl_iterator_hole(&r, &x, true);
+  CHECK_EQ(hole.begin, 43);
+  CHECK_EQ(hole.length, MAX_TXNID - 43);
+  hole = rkl_iterator_hole(&r, &x, true);
+  CHECK_EQ(hole.begin, 1);
+  CHECK_EQ(hole.length, 41);
+  hole = rkl_iterator_hole(&r, &x, false);
+  CHECK_EQ(hole.begin, 43);
+  CHECK_EQ(hole.length, MAX_TXNID - 43);
+  hole = rkl_iterator_hole(&r, &x, false);
+  CHECK_EQ(hole.begin, MAX_TXNID);
+  CHECK_EQ(hole.length, 0);
+
   rkl_resize(&x, 222);
   CHECK_FALSE(rkl_empty(&x));
   CHECK_TRUE(rkl_check(&x));
@@ -110,7 +160,14 @@ void trivia(void) {
 
 /*-----------------------------------------------------------------------------*/
 
-uint64_t prng;
+uint64_t prng_state;
+
+static uint64_t prng(void) {
+  prng_state = prng_state * UINT64_C(6364136223846793005) + 1;
+  return prng_state;
+}
+
+static bool flipcoin(void) { return (bool)prng() & 1; }
 
 static bool stochastic_pass(const unsigned start, const unsigned width, const unsigned n) {
   rkl_t k, c;
@@ -129,8 +186,7 @@ static bool stochastic_pass(const unsigned start, const unsigned width, const un
   txnid_t lowest = UINT_MAX;
   txnid_t highest = 0;
   while (MDBX_PNL_GETSIZE(l) < n) {
-    prng = prng * UINT64_C(6364136223846793005) + 1;
-    txnid_t id = (txnid_t)(prng % width + start);
+    txnid_t id = (txnid_t)(prng() % width + start);
     if (id < MIN_TXNID || id >= INVALID_TXNID)
       continue;
     if (txl_contain(l, id)) {
@@ -228,6 +284,198 @@ static bool stochastic(const size_t limit_cases, const size_t limit_loops) {
   return true;
 }
 
+/*-----------------------------------------------------------------------------*/
+
+static bool bit(size_t set, size_t n) {
+  assert(n < CHAR_BIT * sizeof(set));
+  return (set >> n) & 1;
+}
+
+static size_t hamming_weight(size_t v) {
+  const size_t m1 = (size_t)UINT64_C(0x5555555555555555);
+  const size_t m2 = (size_t)UINT64_C(0x3333333333333333);
+  const size_t m4 = (size_t)UINT64_C(0x0f0f0f0f0f0f0f0f);
+  const size_t h01 = (size_t)UINT64_C(0x0101010101010101);
+  v -= (v >> 1) & m1;
+  v = (v & m2) + ((v >> 2) & m2);
+  v = (v + (v >> 4)) & m4;
+  return (v * h01) >> (sizeof(v) * 8 - 8);
+}
+
+static bool check_hole(const size_t set, const rkl_hole_t hole, size_t *acc) {
+  const size_t errors = tst_failed;
+  ++tst_iterations;
+
+  if (hole.begin > 1)
+    CHECK_EQ(bit(set, hole.begin - 1), 1);
+  if (hole.begin + hole.length < CHAR_BIT * sizeof(set))
+    CHECK_EQ(bit(set, hole.begin + hole.length), 1);
+
+  for (size_t n = 0; n < hole.length && hole.begin + n < CHAR_BIT * sizeof(set); n++) {
+    CHECK_EQ(bit(set, hole.begin + n), 0);
+    *acc += 1;
+  }
+
+  return errors == tst_failed;
+}
+
+static void debug_set(const size_t set, const char *str, int iter_offset) {
+#if 1
+  (void)set;
+  (void)str;
+  (void)iter_offset;
+#else
+  printf("\ncase %s+%d: count %zu, holes", str, iter_offset, hamming_weight(~set) - 1);
+  for (size_t k, i = 1; i < CHAR_BIT * sizeof(set); ++i) {
+    if (!bit(set, i)) {
+      printf(" %zu", i);
+      for (k = i; k < CHAR_BIT * sizeof(set) - 1 && !bit(set, k + 1); ++k)
+        ;
+      if (k > i) {
+        printf("-%zu", k);
+        i = k;
+      }
+    }
+  }
+  printf("\n");
+  fflush(nullptr);
+#endif
+}
+
+static bool check_holes_bothsides(const size_t set, const rkl_t *rkl, rkl_iterator_t const *i) {
+  const size_t number_of_holes = hamming_weight(~set) - 1;
+  size_t acc = 0;
+
+  rkl_iterator_t f = *i;
+  for (;;) {
+    rkl_hole_t hole = rkl_iterator_hole(&f, rkl, false);
+    if (hole.length == 0)
+      break;
+    if (!check_hole(set, hole, &acc))
+      return false;
+    if (hole.begin + hole.length >= CHAR_BIT * sizeof(set))
+      break;
+  }
+
+  rkl_iterator_t b = *i;
+  for (;;) {
+    rkl_hole_t hole = rkl_iterator_hole(&b, rkl, true);
+    if (hole.length == 0)
+      break;
+    if (!check_hole(set, hole, &acc)) {
+      b = *i;
+      hole = rkl_iterator_hole(&b, rkl, true);
+      return false;
+    }
+    if (hole.begin == 1)
+      break;
+  }
+
+  if (!CHECK_EQ(acc, number_of_holes))
+    return false;
+
+  return true;
+}
+
+static bool check_holes_fourways(const size_t set, const rkl_t *rkl) {
+  rkl_iterator_t i, j;
+  rkl_iterator_init(&i, rkl, false);
+  int o = 0;
+  do {
+    debug_set(set, "initial-forward", o++);
+    j = i;
+    if (!check_holes_bothsides(set, rkl, &i)) {
+      check_holes_bothsides(set, rkl, &j);
+      return false;
+    }
+  } while (rkl_iterator_turn(&i, rkl, false));
+
+  do {
+    debug_set(set, "recoil-reverse", --o);
+    if (!check_holes_bothsides(set, rkl, &i))
+      return false;
+  } while (rkl_iterator_turn(&i, rkl, true));
+
+  rkl_iterator_init(&i, rkl, true);
+  o = 0;
+  do {
+    debug_set(set, "initial-reverse", --o);
+    if (!check_holes_bothsides(set, rkl, &i))
+      return false;
+  } while (rkl_iterator_turn(&i, rkl, false));
+
+  do {
+    debug_set(set, "recoil-forward", o++);
+    if (!check_holes_bothsides(set, rkl, &i))
+      return false;
+  } while (rkl_iterator_turn(&i, rkl, true));
+
+  return true;
+}
+
+static bool stochastic_pass_hole(size_t set, size_t trims) {
+  const size_t one = 1;
+  set &= ~one;
+  if (!set)
+    return true;
+
+  ++tst_cases_hole;
+
+  rkl_t rkl;
+  rkl_init(&rkl);
+  for (size_t n = 1; n < CHAR_BIT * sizeof(set); ++n)
+    if (bit(set, n))
+      CHECK_EQ(rkl_push(&rkl, n, false), MDBX_SUCCESS);
+
+  if (!check_holes_fourways(set, &rkl))
+    return false;
+
+  while (rkl_len(&rkl) > 1 && trims-- > 0) {
+    if (flipcoin()) {
+      const size_t l = (size_t)rkl_pop(&rkl, false);
+      if (l == 0)
+        break;
+      assert(bit(set, l));
+      set -= one << l;
+      if (!check_holes_fourways(set, &rkl))
+        return false;
+    } else {
+
+      const size_t h = (size_t)rkl_pop(&rkl, true);
+      if (h == 0)
+        break;
+      assert(bit(set, h));
+      set -= one << h;
+      if (!check_holes_fourways(set, &rkl))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+static size_t prng_word(void) {
+  size_t word = (size_t)(prng() >> 32);
+  if (sizeof(word) > 4)
+    word = word << 32 | (size_t)(prng() >> 32);
+  return word;
+}
+
+static bool stochastic_hole(size_t probes) {
+  for (size_t n = 0; n < probes; ++n) {
+    size_t set = prng_word();
+    if (!stochastic_pass_hole(set, prng() % 11))
+      return false;
+    if (!stochastic_pass_hole(set & prng_word(), prng() % 11))
+      return false;
+    if (!stochastic_pass_hole(set | prng_word(), prng() % 11))
+      return false;
+  }
+  return true;
+}
+
+/*-----------------------------------------------------------------------------*/
+
 int main(int argc, const char *argv[]) {
   (void)argc;
   (void)argv;
@@ -235,14 +483,15 @@ int main(int argc, const char *argv[]) {
 #ifndef NDEBUG
   // tst_target = 281870;
 #endif
-  prng = (uint64_t)time(nullptr);
-  printf("prng-seed %" PRIu64 "\n", prng);
+  prng_state = (uint64_t)time(nullptr);
+  printf("prng-seed %" PRIu64 "\n", prng_state);
   fflush(nullptr);
 
   trivia();
   stochastic(42 * 42 * 42, 42);
-  printf("done: %zu cases, %zu iterations, %zu checks ok, %zu checks failed\n", tst_cases, tst_iterations, tst_ok,
-         tst_failed);
+  stochastic_hole(24 * 24 * 24);
+  printf("done: %zu+%zu cases, %zu iterations, %zu checks ok, %zu checks failed\n", tst_cases, tst_cases_hole,
+         tst_iterations, tst_ok, tst_failed);
   fflush(nullptr);
   return tst_failed ? EXIT_FAILURE : EXIT_SUCCESS;
 }