mirror of
				https://github.com/isar/libmdbx.git
				synced 2025-10-31 03:29:01 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			485 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			485 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /// \copyright SPDX-License-Identifier: Apache-2.0
 | ||
| /// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
 | ||
| 
 | ||
| #include "internals.h"
 | ||
| 
 | ||
| void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) {
 | ||
|   tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) &&
 | ||
|                    txn->tw.spilled.least_removed > 0);
 | ||
|   txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
 | ||
|                                       ? idx
 | ||
|                                       : txn->tw.spilled.least_removed;
 | ||
|   txn->tw.spilled.list[idx] |= 1;
 | ||
|   MDBX_PNL_SETSIZE(txn->tw.spilled.list,
 | ||
|                    MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
 | ||
|                        (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
 | ||
| 
 | ||
|   while (unlikely(npages > 1)) {
 | ||
|     const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1;
 | ||
|     if (MDBX_PNL_ASCENDING) {
 | ||
|       if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) ||
 | ||
|           (txn->tw.spilled.list[idx] >> 1) != pgno)
 | ||
|         return;
 | ||
|     } else {
 | ||
|       if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno)
 | ||
|         return;
 | ||
|       txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
 | ||
|                                           ? idx
 | ||
|                                           : txn->tw.spilled.least_removed;
 | ||
|     }
 | ||
|     txn->tw.spilled.list[idx] |= 1;
 | ||
|     MDBX_PNL_SETSIZE(txn->tw.spilled.list,
 | ||
|                      MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
 | ||
|                          (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
 | ||
|     --npages;
 | ||
|   }
 | ||
| }
 | ||
| 
 | ||
| pnl_t spill_purge(MDBX_txn *txn) {
 | ||
|   tASSERT(txn, txn->tw.spilled.least_removed > 0);
 | ||
|   const pnl_t sl = txn->tw.spilled.list;
 | ||
|   if (txn->tw.spilled.least_removed != INT_MAX) {
 | ||
|     size_t len = MDBX_PNL_GETSIZE(sl), r, w;
 | ||
|     for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) {
 | ||
|       sl[w] = sl[r];
 | ||
|       w += 1 - (sl[r] & 1);
 | ||
|     }
 | ||
|     for (size_t i = 1; i < w; ++i)
 | ||
|       tASSERT(txn, (sl[i] & 1) == 0);
 | ||
|     MDBX_PNL_SETSIZE(sl, w - 1);
 | ||
|     txn->tw.spilled.least_removed = INT_MAX;
 | ||
|   } else {
 | ||
|     for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i)
 | ||
|       tASSERT(txn, (sl[i] & 1) == 0);
 | ||
|   }
 | ||
|   return sl;
 | ||
| }
 | ||
| 
 | ||
| /*----------------------------------------------------------------------------*/
 | ||
| 
 | ||
| static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp,
 | ||
|                       const size_t npages) {
 | ||
|   tASSERT(txn, !(txn->flags & MDBX_WRITEMAP));
 | ||
| #if MDBX_ENABLE_PGOP_STAT
 | ||
|   txn->env->lck->pgops.spill.weak += npages;
 | ||
| #endif /* MDBX_ENABLE_PGOP_STAT */
 | ||
|   const pgno_t pgno = dp->pgno;
 | ||
|   int err = iov_page(txn, ctx, dp, npages);
 | ||
|   if (likely(err == MDBX_SUCCESS))
 | ||
|     err = spill_append_span(&txn->tw.spilled.list, pgno, npages);
 | ||
|   return err;
 | ||
| }
 | ||
| 
 | ||
| /* Set unspillable LRU-label for dirty pages watched by txn.
 | ||
|  * Returns the number of pages marked as unspillable. */
 | ||
| static size_t spill_cursor_keep(const MDBX_txn *const txn,
 | ||
|                                 const MDBX_cursor *mc) {
 | ||
|   tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
 | ||
|   size_t keep = 0;
 | ||
|   while (!is_poor(mc)) {
 | ||
|     tASSERT(txn, mc->top >= 0);
 | ||
|     const page_t *mp;
 | ||
|     intptr_t i = 0;
 | ||
|     do {
 | ||
|       mp = mc->pg[i];
 | ||
|       tASSERT(txn, !is_subpage(mp));
 | ||
|       if (is_modifable(txn, mp)) {
 | ||
|         size_t const n = dpl_search(txn, mp->pgno);
 | ||
|         if (txn->tw.dirtylist->items[n].pgno == mp->pgno &&
 | ||
|             /* не считаем дважды */ dpl_age(txn, n)) {
 | ||
|           size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr,
 | ||
|                                        -(ptrdiff_t)sizeof(size_t));
 | ||
|           *ptr = txn->tw.dirtylru;
 | ||
|           tASSERT(txn, dpl_age(txn, n) == 0);
 | ||
|           ++keep;
 | ||
|         }
 | ||
|       }
 | ||
|     } while (++i <= mc->top);
 | ||
| 
 | ||
|     tASSERT(txn, is_leaf(mp));
 | ||
|     if (!mc->subcur || mc->ki[mc->top] >= page_numkeys(mp))
 | ||
|       break;
 | ||
|     if (!(node_flags(page_node(mp, mc->ki[mc->top])) & N_SUBDATA))
 | ||
|       break;
 | ||
|     mc = &mc->subcur->cursor;
 | ||
|   }
 | ||
|   return keep;
 | ||
| }
 | ||
| 
 | ||
| static size_t spill_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
 | ||
|   tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
 | ||
|   dpl_lru_turn(txn);
 | ||
|   size_t keep = m0 ? spill_cursor_keep(txn, m0) : 0;
 | ||
| 
 | ||
|   TXN_FOREACH_DBI_ALL(txn, dbi) {
 | ||
|     if (F_ISSET(txn->dbi_state[dbi], DBI_DIRTY | DBI_VALID) &&
 | ||
|         txn->dbs[dbi].root != P_INVALID)
 | ||
|       for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next)
 | ||
|         if (mc != m0)
 | ||
|           keep += spill_cursor_keep(txn, mc);
 | ||
|   }
 | ||
| 
 | ||
|   return keep;
 | ||
| }
 | ||
| 
 | ||
| /* Returns the spilling priority (0..255) for a dirty page:
 | ||
|  *      0 = should be spilled;
 | ||
|  *    ...
 | ||
|  *  > 255 = must not be spilled. */
 | ||
| MDBX_NOTHROW_PURE_FUNCTION static unsigned
 | ||
| spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) {
 | ||
|   dpl_t *const dl = txn->tw.dirtylist;
 | ||
|   const uint32_t age = dpl_age(txn, i);
 | ||
|   const size_t npages = dpl_npages(dl, i);
 | ||
|   const pgno_t pgno = dl->items[i].pgno;
 | ||
|   if (age == 0) {
 | ||
|     DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno);
 | ||
|     return 256;
 | ||
|   }
 | ||
| 
 | ||
|   page_t *const dp = dl->items[i].ptr;
 | ||
|   if (dp->flags & (P_LOOSE | P_SPILLED)) {
 | ||
|     DEBUG("skip %s %zu page %" PRIaPGNO,
 | ||
|           (dp->flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno);
 | ||
|     return 256;
 | ||
|   }
 | ||
| 
 | ||
|   /* Can't spill twice,
 | ||
|    * make sure it's not already in a parent's spill list(s). */
 | ||
|   MDBX_txn *parent = txn->parent;
 | ||
|   if (parent && (parent->flags & MDBX_TXN_SPILLS)) {
 | ||
|     do
 | ||
|       if (spill_intersect(parent, pgno, npages)) {
 | ||
|         DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno);
 | ||
|         dp->flags |= P_SPILLED;
 | ||
|         return 256;
 | ||
|       }
 | ||
|     while ((parent = parent->parent) != nullptr);
 | ||
|   }
 | ||
| 
 | ||
|   tASSERT(txn, age * (uint64_t)reciprocal < UINT32_MAX);
 | ||
|   unsigned prio = age * reciprocal >> 24;
 | ||
|   tASSERT(txn, prio < 256);
 | ||
|   if (likely(npages == 1))
 | ||
|     return prio = 256 - prio;
 | ||
| 
 | ||
|   /* make a large/overflow pages be likely to spill */
 | ||
|   size_t factor = npages | npages >> 1;
 | ||
|   factor |= factor >> 2;
 | ||
|   factor |= factor >> 4;
 | ||
|   factor |= factor >> 8;
 | ||
|   factor |= factor >> 16;
 | ||
|   factor = (size_t)prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
 | ||
|   factor = (factor < 256) ? 255 - factor : 0;
 | ||
|   tASSERT(txn, factor < 256 && factor < (256 - prio));
 | ||
|   return prio = (unsigned)factor;
 | ||
| }
 | ||
| 
 | ||
| static size_t spill_gate(const MDBX_env *env, intptr_t part,
 | ||
|                          const size_t total) {
 | ||
|   const intptr_t spill_min =
 | ||
|       env->options.spill_min_denominator
 | ||
|           ? (total + env->options.spill_min_denominator - 1) /
 | ||
|                 env->options.spill_min_denominator
 | ||
|           : 1;
 | ||
|   const intptr_t spill_max =
 | ||
|       total - (env->options.spill_max_denominator
 | ||
|                    ? total / env->options.spill_max_denominator
 | ||
|                    : 0);
 | ||
|   part = (part < spill_max) ? part : spill_max;
 | ||
|   part = (part > spill_min) ? part : spill_min;
 | ||
|   eASSERT(env, part >= 0 && (size_t)part <= total);
 | ||
|   return (size_t)part;
 | ||
| }
 | ||
| 
 | ||
| __cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
 | ||
|                           const intptr_t wanna_spill_entries,
 | ||
|                           const intptr_t wanna_spill_npages,
 | ||
|                           const size_t need) {
 | ||
|   tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
 | ||
| 
 | ||
|   int rc = MDBX_SUCCESS;
 | ||
|   if (unlikely(txn->tw.loose_count >=
 | ||
|                (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
 | ||
|                                   : txn->tw.writemap_dirty_npages)))
 | ||
|     goto done;
 | ||
| 
 | ||
|   const size_t dirty_entries =
 | ||
|       txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1;
 | ||
|   const size_t dirty_npages =
 | ||
|       (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
 | ||
|                          : txn->tw.writemap_dirty_npages) -
 | ||
|       txn->tw.loose_count;
 | ||
|   const size_t need_spill_entries =
 | ||
|       spill_gate(txn->env, wanna_spill_entries, dirty_entries);
 | ||
|   const size_t need_spill_npages =
 | ||
|       spill_gate(txn->env, wanna_spill_npages, dirty_npages);
 | ||
| 
 | ||
|   const size_t need_spill = (need_spill_entries > need_spill_npages)
 | ||
|                                 ? need_spill_entries
 | ||
|                                 : need_spill_npages;
 | ||
|   if (!need_spill)
 | ||
|     goto done;
 | ||
| 
 | ||
|   if (txn->flags & MDBX_WRITEMAP) {
 | ||
|     NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
 | ||
|            dirty_entries, dirty_npages);
 | ||
|     const MDBX_env *env = txn->env;
 | ||
|     tASSERT(txn, txn->tw.spilled.list == nullptr);
 | ||
|     rc = osal_msync(&txn->env->dxb_mmap, 0,
 | ||
|                     pgno_align2os_bytes(env, txn->geo.first_unallocated),
 | ||
|                     MDBX_SYNC_KICK);
 | ||
|     if (unlikely(rc != MDBX_SUCCESS))
 | ||
|       goto bailout;
 | ||
| #if MDBX_AVOID_MSYNC
 | ||
|     MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr);
 | ||
|     tASSERT(txn, dpl_check(txn));
 | ||
|     env->lck->unsynced_pages.weak +=
 | ||
|         txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count;
 | ||
|     dpl_clear(txn->tw.dirtylist);
 | ||
|     txn->tw.dirtyroom = env->options.dp_limit - txn->tw.loose_count;
 | ||
|     for (page_t *lp = txn->tw.loose_pages; lp != nullptr; lp = page_next(lp)) {
 | ||
|       tASSERT(txn, lp->flags == P_LOOSE);
 | ||
|       rc = dpl_append(txn, lp->pgno, lp, 1);
 | ||
|       if (unlikely(rc != MDBX_SUCCESS))
 | ||
|         goto bailout;
 | ||
|       MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
 | ||
|       VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
 | ||
|     }
 | ||
|     tASSERT(txn, dpl_check(txn));
 | ||
| #else
 | ||
|     tASSERT(txn, txn->tw.dirtylist == nullptr);
 | ||
|     env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages;
 | ||
|     txn->tw.writemap_spilled_npages += txn->tw.writemap_dirty_npages;
 | ||
|     txn->tw.writemap_dirty_npages = 0;
 | ||
| #endif /* MDBX_AVOID_MSYNC */
 | ||
|     goto done;
 | ||
|   }
 | ||
| 
 | ||
|   NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write",
 | ||
|          need_spill_entries, need_spill_npages);
 | ||
|   MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr);
 | ||
|   tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1);
 | ||
|   tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
 | ||
|                    need_spill_npages);
 | ||
|   if (!txn->tw.spilled.list) {
 | ||
|     txn->tw.spilled.least_removed = INT_MAX;
 | ||
|     txn->tw.spilled.list = pnl_alloc(need_spill);
 | ||
|     if (unlikely(!txn->tw.spilled.list)) {
 | ||
|       rc = MDBX_ENOMEM;
 | ||
|     bailout:
 | ||
|       txn->flags |= MDBX_TXN_ERROR;
 | ||
|       return rc;
 | ||
|     }
 | ||
|   } else {
 | ||
|     /* purge deleted slots */
 | ||
|     spill_purge(txn);
 | ||
|     rc = pnl_reserve(&txn->tw.spilled.list, need_spill);
 | ||
|     (void)rc /* ignore since the resulting list may be shorter
 | ||
|      and pnl_append() will increase pnl on demand */
 | ||
|         ;
 | ||
|   }
 | ||
| 
 | ||
|   /* Сортируем чтобы запись на диск была полее последовательна */
 | ||
|   dpl_t *const dl = dpl_sort(txn);
 | ||
| 
 | ||
|   /* Preserve pages which may soon be dirtied again */
 | ||
|   const size_t unspillable = spill_txn_keep(txn, m0);
 | ||
|   if (unspillable + txn->tw.loose_count >= dl->length) {
 | ||
| #if xMDBX_DEBUG_SPILLING == 1 /* avoid false failure in debug mode  */
 | ||
|     if (likely(txn->tw.dirtyroom + txn->tw.loose_count >= need))
 | ||
|       return MDBX_SUCCESS;
 | ||
| #endif /* xMDBX_DEBUG_SPILLING */
 | ||
|     ERROR("all %zu dirty pages are unspillable since referenced "
 | ||
|           "by a cursor(s), use fewer cursors or increase "
 | ||
|           "MDBX_opt_txn_dp_limit",
 | ||
|           unspillable);
 | ||
|     goto done;
 | ||
|   }
 | ||
| 
 | ||
|   /* Подзадача: Вытолкнуть часть страниц на диск в соответствии с LRU,
 | ||
|    * но при этом учесть важные поправки:
 | ||
|    *  - лучше выталкивать старые large/overflow страницы, так будет освобождено
 | ||
|    *    больше памяти, а также так как они (в текущем понимании) гораздо реже
 | ||
|    *    повторно изменяются;
 | ||
|    *  - при прочих равных лучше выталкивать смежные страницы, так будет
 | ||
|    *    меньше I/O операций;
 | ||
|    *  - желательно потратить на это меньше времени чем std::partial_sort_copy;
 | ||
|    *
 | ||
|    * Решение:
 | ||
|    *  - Квантуем весь диапазон lru-меток до 256 значений и задействуем один
 | ||
|    *    проход 8-битного radix-sort. В результате получаем 256 уровней
 | ||
|    *    "свежести", в том числе значение lru-метки, старее которой страницы
 | ||
|    *    должны быть выгружены;
 | ||
|    *  - Двигаемся последовательно в сторону увеличения номеров страниц
 | ||
|    *    и выталкиваем страницы с lru-меткой старее отсекающего значения,
 | ||
|    *    пока не вытолкнем достаточно;
 | ||
|    *  - Встречая страницы смежные с выталкиваемыми для уменьшения кол-ва
 | ||
|    *    I/O операций выталкиваем и их, если они попадают в первую половину
 | ||
|    *    между выталкиваемыми и самыми свежими lru-метками;
 | ||
|    *  - дополнительно при сортировке умышленно старим large/overflow страницы,
 | ||
|    *    тем самым повышая их шансы на выталкивание. */
 | ||
| 
 | ||
|   /* get min/max of LRU-labels */
 | ||
|   uint32_t age_max = 0;
 | ||
|   for (size_t i = 1; i <= dl->length; ++i) {
 | ||
|     const uint32_t age = dpl_age(txn, i);
 | ||
|     age_max = (age_max >= age) ? age_max : age;
 | ||
|   }
 | ||
| 
 | ||
|   VERBOSE("lru-head %u, age-max %u", txn->tw.dirtylru, age_max);
 | ||
| 
 | ||
|   /* half of 8-bit radix-sort */
 | ||
|   pgno_t radix_entries[256], radix_npages[256];
 | ||
|   memset(&radix_entries, 0, sizeof(radix_entries));
 | ||
|   memset(&radix_npages, 0, sizeof(radix_npages));
 | ||
|   size_t spillable_entries = 0, spillable_npages = 0;
 | ||
|   const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1);
 | ||
|   for (size_t i = 1; i <= dl->length; ++i) {
 | ||
|     const unsigned prio = spill_prio(txn, i, reciprocal);
 | ||
|     size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t));
 | ||
|     TRACE("page %" PRIaPGNO
 | ||
|           ", lru %zu, is_multi %c, npages %u, age %u of %u, prio %u",
 | ||
|           dl->items[i].pgno, *ptr, (dl->items[i].npages > 1) ? 'Y' : 'N',
 | ||
|           dpl_npages(dl, i), dpl_age(txn, i), age_max, prio);
 | ||
|     if (prio < 256) {
 | ||
|       radix_entries[prio] += 1;
 | ||
|       spillable_entries += 1;
 | ||
|       const pgno_t npages = dpl_npages(dl, i);
 | ||
|       radix_npages[prio] += npages;
 | ||
|       spillable_npages += npages;
 | ||
|     }
 | ||
|   }
 | ||
| 
 | ||
|   tASSERT(txn, spillable_npages >= spillable_entries);
 | ||
|   pgno_t spilled_entries = 0, spilled_npages = 0;
 | ||
|   if (likely(spillable_entries > 0)) {
 | ||
|     size_t prio2spill = 0, prio2adjacent = 128,
 | ||
|            amount_entries = radix_entries[0], amount_npages = radix_npages[0];
 | ||
|     for (size_t i = 1; i < 256; i++) {
 | ||
|       if (amount_entries < need_spill_entries ||
 | ||
|           amount_npages < need_spill_npages) {
 | ||
|         prio2spill = i;
 | ||
|         prio2adjacent = i + (257 - i) / 2;
 | ||
|         amount_entries += radix_entries[i];
 | ||
|         amount_npages += radix_npages[i];
 | ||
|       } else if (amount_entries + amount_entries <
 | ||
|                      spillable_entries + need_spill_entries
 | ||
|                  /* РАВНОЗНАЧНО: amount - need_spill < spillable - amount */
 | ||
|                  || amount_npages + amount_npages <
 | ||
|                         spillable_npages + need_spill_npages) {
 | ||
|         prio2adjacent = i;
 | ||
|         amount_entries += radix_entries[i];
 | ||
|         amount_npages += radix_npages[i];
 | ||
|       } else
 | ||
|         break;
 | ||
|     }
 | ||
| 
 | ||
|     VERBOSE("prio2spill %zu, prio2adjacent %zu, spillable %zu/%zu,"
 | ||
|             " wanna-spill %zu/%zu, amount %zu/%zu",
 | ||
|             prio2spill, prio2adjacent, spillable_entries, spillable_npages,
 | ||
|             need_spill_entries, need_spill_npages, amount_entries,
 | ||
|             amount_npages);
 | ||
|     tASSERT(txn, prio2spill < prio2adjacent && prio2adjacent <= 256);
 | ||
| 
 | ||
|     iov_ctx_t ctx;
 | ||
|     rc = iov_init(
 | ||
|         txn, &ctx, amount_entries, amount_npages,
 | ||
| #if defined(_WIN32) || defined(_WIN64)
 | ||
|         txn->env->ioring.overlapped_fd ? txn->env->ioring.overlapped_fd :
 | ||
| #endif
 | ||
|                                        txn->env->lazy_fd,
 | ||
|         true);
 | ||
|     if (unlikely(rc != MDBX_SUCCESS))
 | ||
|       goto bailout;
 | ||
| 
 | ||
|     size_t r = 0, w = 0;
 | ||
|     pgno_t last = 0;
 | ||
|     while (r < dl->length && (spilled_entries < need_spill_entries ||
 | ||
|                               spilled_npages < need_spill_npages)) {
 | ||
|       dl->items[++w] = dl->items[++r];
 | ||
|       unsigned prio = spill_prio(txn, w, reciprocal);
 | ||
|       if (prio > prio2spill &&
 | ||
|           (prio >= prio2adjacent || last != dl->items[w].pgno))
 | ||
|         continue;
 | ||
| 
 | ||
|       const size_t e = w;
 | ||
|       last = dpl_endpgno(dl, w);
 | ||
|       while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno &&
 | ||
|              spill_prio(txn, w, reciprocal) < prio2adjacent)
 | ||
|         ;
 | ||
| 
 | ||
|       for (size_t i = w; ++i <= e;) {
 | ||
|         const unsigned npages = dpl_npages(dl, i);
 | ||
|         prio = spill_prio(txn, i, reciprocal);
 | ||
|         DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)",
 | ||
|               (prio > prio2spill) ? "co-" : "", i, npages, dl->items[i].pgno,
 | ||
|               dpl_age(txn, i), prio);
 | ||
|         tASSERT(txn, prio < 256);
 | ||
|         ++spilled_entries;
 | ||
|         spilled_npages += npages;
 | ||
|         rc = spill_page(txn, &ctx, dl->items[i].ptr, npages);
 | ||
|         if (unlikely(rc != MDBX_SUCCESS))
 | ||
|           goto failed;
 | ||
|       }
 | ||
|     }
 | ||
| 
 | ||
|     VERBOSE("spilled entries %u, spilled npages %u", spilled_entries,
 | ||
|             spilled_npages);
 | ||
|     tASSERT(txn, spillable_entries == 0 || spilled_entries > 0);
 | ||
|     tASSERT(txn, spilled_npages >= spilled_entries);
 | ||
| 
 | ||
|   failed:
 | ||
|     while (r < dl->length)
 | ||
|       dl->items[++w] = dl->items[++r];
 | ||
|     tASSERT(txn, r - w == spilled_entries || rc != MDBX_SUCCESS);
 | ||
| 
 | ||
|     dl->sorted = dpl_setlen(dl, w);
 | ||
|     txn->tw.dirtyroom += spilled_entries;
 | ||
|     txn->tw.dirtylist->pages_including_loose -= spilled_npages;
 | ||
|     tASSERT(txn, dpl_check(txn));
 | ||
| 
 | ||
|     if (!iov_empty(&ctx)) {
 | ||
|       tASSERT(txn, rc == MDBX_SUCCESS);
 | ||
|       rc = iov_write(&ctx);
 | ||
|     }
 | ||
|     if (unlikely(rc != MDBX_SUCCESS))
 | ||
|       goto bailout;
 | ||
| 
 | ||
|     txn->env->lck->unsynced_pages.weak += spilled_npages;
 | ||
|     pnl_sort(txn->tw.spilled.list, (size_t)txn->geo.first_unallocated << 1);
 | ||
|     txn->flags |= MDBX_TXN_SPILLS;
 | ||
|     NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
 | ||
|            spilled_entries, spilled_npages, txn->tw.dirtyroom);
 | ||
|   } else {
 | ||
|     tASSERT(txn, rc == MDBX_SUCCESS);
 | ||
|     for (size_t i = 1; i <= dl->length; ++i) {
 | ||
|       page_t *dp = dl->items[i].ptr;
 | ||
|       VERBOSE(
 | ||
|           "unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u",
 | ||
|           i, dp->pgno, dpl_npages(dl, i), dp->flags, dpl_age(txn, i),
 | ||
|           spill_prio(txn, i, reciprocal));
 | ||
|     }
 | ||
|   }
 | ||
| 
 | ||
| #if xMDBX_DEBUG_SPILLING == 2
 | ||
|   if (txn->tw.loose_count + txn->tw.dirtyroom <= need / 2 + 1)
 | ||
|     ERROR("dirty-list length: before %zu, after %zu, parent %zi, loose %zu; "
 | ||
|           "needed %zu, spillable %zu; "
 | ||
|           "spilled %u dirty-entries, now have %zu dirty-room",
 | ||
|           dl->length + spilled_entries, dl->length,
 | ||
|           (txn->parent && txn->parent->tw.dirtylist)
 | ||
|               ? (intptr_t)txn->parent->tw.dirtylist->length
 | ||
|               : -1,
 | ||
|           txn->tw.loose_count, need, spillable_entries, spilled_entries,
 | ||
|           txn->tw.dirtyroom);
 | ||
|   ENSURE(txn->env, txn->tw.loose_count + txn->tw.dirtyroom > need / 2);
 | ||
| #endif /* xMDBX_DEBUG_SPILLING */
 | ||
| 
 | ||
| done:
 | ||
|   return likely(txn->tw.dirtyroom + txn->tw.loose_count >
 | ||
|                 ((need > CURSOR_STACK_SIZE) ? CURSOR_STACK_SIZE : need))
 | ||
|              ? MDBX_SUCCESS
 | ||
|              : MDBX_TXN_FULL;
 | ||
| }
 |