mdbx: refine pgno-lists.

Change-Id: Id38e4aff508437210d4c0e63962067dfe518eb31
This commit is contained in:
Leo Yuriev 2017-07-26 09:31:22 +03:00
parent 592c064873
commit 552b759878
3 changed files with 312 additions and 295 deletions

View File

@ -434,11 +434,11 @@ typedef struct MDBX_lockinfo {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* Two kind lists of pages (aka IDL) */ /* Two kind lists of pages (aka IDL) */
/* An IDL is an ID List, a sorted array of IDs. The first /* An PNL is an Page Number List, a sorted array of IDs. The first
* element of the array is a counter for how many actual * element of the array is a counter for how many actual
* IDs are in the list. In the libmdbx IDLs are sorted in * IDs are in the list. In the libmdbx PNLs are sorted in
* descending order. */ * descending order. */
typedef pgno_t *MDBX_IDL; typedef pgno_t *MDBX_PNL;
/* List of txnid, only for MDBX_env.mt_lifo_reclaimed */ /* List of txnid, only for MDBX_env.mt_lifo_reclaimed */
typedef txnid_t *MDBX_TXL; typedef txnid_t *MDBX_TXL;
@ -455,23 +455,23 @@ typedef struct MDBX_ID2 {
* unused. The array is sorted in ascending order by mid. */ * unused. The array is sorted in ascending order by mid. */
typedef MDBX_ID2 *MDBX_ID2L; typedef MDBX_ID2 *MDBX_ID2L;
/* IDL sizes - likely should be even bigger /* PNL sizes - likely should be even bigger
* limiting factors: sizeof(pgno_t), thread stack size */ * limiting factors: sizeof(pgno_t), thread stack size */
#define MDBX_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ #define MDBX_PNL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
#define MDBX_IDL_DB_SIZE (1 << MDBX_IDL_LOGN) #define MDBX_PNL_DB_SIZE (1 << MDBX_PNL_LOGN)
#define MDBX_IDL_UM_SIZE (1 << (MDBX_IDL_LOGN + 1)) #define MDBX_PNL_UM_SIZE (1 << (MDBX_PNL_LOGN + 1))
#define MDBX_IDL_DB_MAX (MDBX_IDL_DB_SIZE - 1) #define MDBX_PNL_DB_MAX (MDBX_PNL_DB_SIZE - 1)
#define MDBX_IDL_UM_MAX (MDBX_IDL_UM_SIZE - 1) #define MDBX_PNL_UM_MAX (MDBX_PNL_UM_SIZE - 1)
#define MDBX_IDL_SIZEOF(ids) (((ids)[0] + 1) * sizeof(pgno_t)) #define MDBX_PNL_SIZEOF(pl) (((pl)[0] + 1) * sizeof(pgno_t))
#define MDBX_IDL_IS_ZERO(ids) ((ids)[0] == 0) #define MDBX_PNL_IS_ZERO(pl) ((pl)[0] == 0)
#define MDBX_IDL_CPY(dst, src) (memcpy(dst, src, MDBX_IDL_SIZEOF(src))) #define MDBX_PNL_CPY(dst, src) (memcpy(dst, src, MDBX_PNL_SIZEOF(src)))
#define MDBX_IDL_FIRST(ids) ((ids)[1]) #define MDBX_PNL_FIRST(pl) ((pl)[1])
#define MDBX_IDL_LAST(ids) ((ids)[(ids)[0]]) #define MDBX_PNL_LAST(pl) ((pl)[(pl)[0]])
/* Current max length of an mdbx_midl_alloc()ed IDL */ /* Current max length of an mdbx_pnl_alloc()ed PNL */
#define MDBX_IDL_ALLOCLEN(ids) ((ids)[-1]) #define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* Internal structures */ /* Internal structures */
@ -503,7 +503,7 @@ struct MDBX_txn {
/* The list of reclaimed txns from freeDB */ /* The list of reclaimed txns from freeDB */
MDBX_TXL mt_lifo_reclaimed; MDBX_TXL mt_lifo_reclaimed;
/* The list of pages that became unused during this transaction. */ /* The list of pages that became unused during this transaction. */
MDBX_IDL mt_befree_pages; MDBX_PNL mt_befree_pages;
/* The list of loose pages that became unused and may be reused /* The list of loose pages that became unused and may be reused
* in this transaction, linked through NEXT_LOOSE_PAGE(page). */ * in this transaction, linked through NEXT_LOOSE_PAGE(page). */
MDBX_page *mt_loose_pages; MDBX_page *mt_loose_pages;
@ -512,7 +512,7 @@ struct MDBX_txn {
/* The sorted list of dirty pages we temporarily wrote to disk /* The sorted list of dirty pages we temporarily wrote to disk
* because the dirty list was full. page numbers in here are * because the dirty list was full. page numbers in here are
* shifted left by 1, deleted slots have the LSB set. */ * shifted left by 1, deleted slots have the LSB set. */
MDBX_IDL mt_spill_pages; MDBX_PNL mt_spill_pages;
union { union {
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
MDBX_ID2L mt_rw_dirtylist; MDBX_ID2L mt_rw_dirtylist;
@ -699,9 +699,9 @@ struct MDBX_env {
#define me_last_reclaimed me_pgstate.mf_last_reclaimed #define me_last_reclaimed me_pgstate.mf_last_reclaimed
#define me_reclaimed_pglist me_pgstate.mf_reclaimed_pglist #define me_reclaimed_pglist me_pgstate.mf_reclaimed_pglist
MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */ MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */
/* IDL of pages that became unused in a write txn */ /* PNL of pages that became unused in a write txn */
MDBX_IDL me_free_pgs; MDBX_PNL me_free_pgs;
/* ID2L of pages written during a write txn. Length MDBX_IDL_UM_SIZE. */ /* ID2L of pages written during a write txn. Length MDBX_PNL_UM_SIZE. */
MDBX_ID2L me_dirtylist; MDBX_ID2L me_dirtylist;
/* Max number of freelist items that can fit in a single overflow page */ /* Max number of freelist items that can fit in a single overflow page */
unsigned me_maxfree_1pg; unsigned me_maxfree_1pg;

View File

@ -156,16 +156,16 @@ __cold void mdbx_rthc_remove(mdbx_thread_key_t key) {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* Allocate an IDL. /* Allocate an PNL.
* Allocates memory for an IDL of the given size. * Allocates memory for an PNL of the given size.
* Returns IDL on success, NULL on failure. */ * Returns PNL on success, NULL on failure. */
static MDBX_IDL mdbx_midl_alloc(size_t size) { static MDBX_PNL mdbx_pnl_alloc(size_t size) {
MDBX_IDL ids = malloc((size + 2) * sizeof(pgno_t)); MDBX_PNL pl = malloc((size + 2) * sizeof(pgno_t));
if (likely(ids)) { if (likely(pl)) {
*ids++ = (pgno_t)size; *pl++ = (pgno_t)size;
*ids = 0; *pl = 0;
} }
return ids; return pl;
} }
static MDBX_TXL mdbx_txl_alloc(void) { static MDBX_TXL mdbx_txl_alloc(void) {
@ -181,11 +181,11 @@ static MDBX_TXL mdbx_txl_alloc(void) {
return ptr; return ptr;
} }
/* Free an IDL. /* Free an PNL.
* [in] ids The IDL to free. */ * [in] pl The PNL to free. */
static void mdbx_midl_free(MDBX_IDL ids) { static void mdbx_pnl_free(MDBX_PNL pl) {
if (likely(ids)) if (likely(pl))
free(ids - 1); free(pl - 1);
} }
static void mdbx_txl_free(MDBX_TXL list) { static void mdbx_txl_free(MDBX_TXL list) {
@ -193,29 +193,122 @@ static void mdbx_txl_free(MDBX_TXL list) {
free(list - 1); free(list - 1);
} }
/* Append ID to IDL. The IDL must be big enough. */ /* Append ID to PNL. The PNL must be big enough. */
static __inline void mdbx_midl_xappend(MDBX_IDL idl, pgno_t id) { static __inline void mdbx_pnl_xappend(MDBX_PNL pl, pgno_t id) {
assert(idl[0] + (size_t)1 < MDBX_IDL_ALLOCLEN(idl)); assert(pl[0] + (size_t)1 < MDBX_PNL_ALLOCLEN(pl));
idl[idl[0] += 1] = id; pl[pl[0] += 1] = id;
} }
/* Search for an ID in an IDL. static bool mdbx_pnl_check(MDBX_PNL pl) {
* [in] ids The IDL to search. if (pl) {
for (const pgno_t *ptr = pl + pl[0]; --ptr > pl;) {
assert(ptr[0] > ptr[1]);
if (unlikely(ptr[0] <= ptr[1]))
return false;
}
}
return true;
}
/* Sort an PNL.
* [in,out] pnl The PNL to sort. */
static void __hot mdbx_pnl_sort(MDBX_PNL pnl) {
/* Max possible depth of int-indexed tree * 2 items/level */
int istack[sizeof(int) * CHAR_BIT * 2];
int i, j, k, l, ir, jstack;
pgno_t a;
/* Quicksort + Insertion sort for small arrays */
#define PNL_SMALL 8
#define PNL_SWAP(a, b) \
do { \
pgno_t tmp_pgno = (a); \
(a) = (b); \
(b) = tmp_pgno; \
} while (0)
ir = (int)pnl[0];
l = 1;
jstack = 0;
while (1) {
if (ir - l < PNL_SMALL) { /* Insertion sort */
for (j = l + 1; j <= ir; j++) {
a = pnl[j];
for (i = j - 1; i >= 1; i--) {
if (pnl[i] >= a)
break;
pnl[i + 1] = pnl[i];
}
pnl[i + 1] = a;
}
if (jstack == 0)
break;
ir = istack[jstack--];
l = istack[jstack--];
} else {
k = (l + ir) >> 1; /* Choose median of left, center, right */
PNL_SWAP(pnl[k], pnl[l + 1]);
if (pnl[l] < pnl[ir])
PNL_SWAP(pnl[l], pnl[ir]);
if (pnl[l + 1] < pnl[ir])
PNL_SWAP(pnl[l + 1], pnl[ir]);
if (pnl[l] < pnl[l + 1])
PNL_SWAP(pnl[l], pnl[l + 1]);
i = l + 1;
j = ir;
a = pnl[l + 1];
while (1) {
do
i++;
while (pnl[i] > a);
do
j--;
while (pnl[j] < a);
if (j < i)
break;
PNL_SWAP(pnl[i], pnl[j]);
}
pnl[l + 1] = pnl[j];
pnl[j] = a;
jstack += 2;
if (ir - i + 1 >= j - l) {
istack[jstack] = ir;
istack[jstack - 1] = i;
ir = j - 1;
} else {
istack[jstack] = j - 1;
istack[jstack - 1] = l;
l = i;
}
}
}
#undef PNL_SMALL
#undef PNL_SWAP
assert(mdbx_pnl_check(pnl));
}
/* Search for an ID in an PNL.
* [in] pl The PNL to search.
* [in] id The ID to search for. * [in] id The ID to search for.
* Returns The index of the first ID greater than or equal to id. */ * Returns The index of the first ID greater than or equal to id. */
static unsigned __hot mdbx_midl_search(MDBX_IDL ids, pgno_t id) { static unsigned __hot mdbx_pnl_search(MDBX_PNL pnl, pgno_t id) {
/* binary search of id in ids assert(mdbx_pnl_check(pnl));
/* binary search of id in pl
* if found, returns position of id * if found, returns position of id
* if not found, returns first position greater than id */ * if not found, returns first position greater than id */
unsigned base = 0; unsigned base = 0;
unsigned cursor = 1; unsigned cursor = 1;
int val = 0; int val = 0;
unsigned n = ids[0]; unsigned n = pnl[0];
while (n > 0) { while (n > 0) {
unsigned pivot = n >> 1; unsigned pivot = n >> 1;
cursor = base + pivot + 1; cursor = base + pivot + 1;
val = mdbx_cmp2int(ids[cursor], id); val = mdbx_cmp2int(pnl[cursor], id);
if (val < 0) { if (val < 0) {
n = pivot; n = pivot;
@ -233,32 +326,32 @@ static unsigned __hot mdbx_midl_search(MDBX_IDL ids, pgno_t id) {
return cursor; return cursor;
} }
/* Shrink an IDL. /* Shrink an PNL.
* Return the IDL to the default size if it has grown larger. * Return the PNL to the default size if it has grown larger.
* [in,out] idp Address of the IDL to shrink. */ * [in,out] ppl Address of the PNL to shrink. */
static void mdbx_midl_shrink(MDBX_IDL *idp) { static void mdbx_pnl_shrink(MDBX_PNL *ppl) {
MDBX_IDL ids = *idp - 1; MDBX_PNL pl = *ppl - 1;
if (unlikely(*ids > MDBX_IDL_UM_MAX)) { if (unlikely(*pl > MDBX_PNL_UM_MAX)) {
/* shrink to MDBX_IDL_UM_MAX */ /* shrink to MDBX_PNL_UM_MAX */
ids = realloc(ids, (MDBX_IDL_UM_MAX + 2) * sizeof(pgno_t)); pl = realloc(pl, (MDBX_PNL_UM_MAX + 2) * sizeof(pgno_t));
if (likely(ids)) { if (likely(pl)) {
*ids++ = MDBX_IDL_UM_MAX; *pl++ = MDBX_PNL_UM_MAX;
*idp = ids; *ppl = pl;
} }
} }
} }
/* Grow an IDL. /* Grow an PNL.
* Return the IDL to the size growed by given number. * Return the PNL to the size growed by given number.
* [in,out] idp Address of the IDL to grow. */ * [in,out] ppl Address of the PNL to grow. */
static int mdbx_midl_grow(MDBX_IDL *idp, size_t num) { static int mdbx_pnl_grow(MDBX_PNL *ppl, size_t num) {
MDBX_IDL idn = *idp - 1; MDBX_PNL idn = *ppl - 1;
/* grow it */ /* grow it */
idn = realloc(idn, (*idn + num + 2) * sizeof(pgno_t)); idn = realloc(idn, (*idn + num + 2) * sizeof(pgno_t));
if (unlikely(!idn)) if (unlikely(!idn))
return MDBX_ENOMEM; return MDBX_ENOMEM;
*idn++ += (pgno_t)num; *idn++ += (pgno_t)num;
*idp = idn; *ppl = idn;
return 0; return 0;
} }
@ -273,38 +366,38 @@ static int mdbx_txl_grow(MDBX_TXL *ptr, size_t num) {
return 0; return 0;
} }
/* Make room for num additional elements in an IDL. /* Make room for num additional elements in an PNL.
* [in,out] idp Address of the IDL. * [in,out] ppl Address of the PNL.
* [in] num Number of elements to make room for. * [in] num Number of elements to make room for.
* Returns 0 on success, MDBX_ENOMEM on failure. */ * Returns 0 on success, MDBX_ENOMEM on failure. */
static int mdbx_midl_need(MDBX_IDL *idp, size_t num) { static int mdbx_pnl_need(MDBX_PNL *ppl, size_t num) {
MDBX_IDL ids = *idp; MDBX_PNL pl = *ppl;
num += ids[0]; num += pl[0];
if (unlikely(num > ids[-1])) { if (unlikely(num > pl[-1])) {
num = (num + num / 4 + (256 + 2)) & -256; num = (num + num / 4 + (256 + 2)) & -256;
ids = realloc(ids - 1, num * sizeof(pgno_t)); pl = realloc(pl - 1, num * sizeof(pgno_t));
if (unlikely(!ids)) if (unlikely(!pl))
return MDBX_ENOMEM; return MDBX_ENOMEM;
*ids++ = (pgno_t)num - 2; *pl++ = (pgno_t)num - 2;
*idp = ids; *ppl = pl;
} }
return 0; return 0;
} }
/* Append an ID onto an IDL. /* Append an ID onto an PNL.
* [in,out] idp Address of the IDL to append to. * [in,out] ppl Address of the PNL to append to.
* [in] id The ID to append. * [in] id The ID to append.
* Returns 0 on success, MDBX_ENOMEM if the IDL is too large. */ * Returns 0 on success, MDBX_ENOMEM if the PNL is too large. */
static int mdbx_midl_append(MDBX_IDL *idp, pgno_t id) { static int mdbx_pnl_append(MDBX_PNL *ppl, pgno_t id) {
MDBX_IDL ids = *idp; MDBX_PNL pl = *ppl;
/* Too big? */ /* Too big? */
if (unlikely(ids[0] >= ids[-1])) { if (unlikely(pl[0] >= pl[-1])) {
if (mdbx_midl_grow(idp, MDBX_IDL_UM_MAX)) if (mdbx_pnl_grow(ppl, MDBX_PNL_UM_MAX))
return MDBX_ENOMEM; return MDBX_ENOMEM;
ids = *idp; pl = *ppl;
} }
ids[0]++; pl[0]++;
ids[ids[0]] = id; pl[pl[0]] = id;
return 0; return 0;
} }
@ -321,20 +414,20 @@ static int mdbx_txl_append(MDBX_TXL *ptr, txnid_t id) {
return 0; return 0;
} }
/* Append an IDL onto an IDL. /* Append an PNL onto an PNL.
* [in,out] idp Address of the IDL to append to. * [in,out] ppl Address of the PNL to append to.
* [in] app The IDL to append. * [in] app The PNL to append.
* Returns 0 on success, MDBX_ENOMEM if the IDL is too large. */ * Returns 0 on success, MDBX_ENOMEM if the PNL is too large. */
static int mdbx_midl_append_list(MDBX_IDL *idp, MDBX_IDL app) { static int mdbx_pnl_append_list(MDBX_PNL *ppl, MDBX_PNL app) {
MDBX_IDL ids = *idp; MDBX_PNL pnl = *ppl;
/* Too big? */ /* Too big? */
if (unlikely(ids[0] + app[0] >= ids[-1])) { if (unlikely(pnl[0] + app[0] >= pnl[-1])) {
if (mdbx_midl_grow(idp, app[0])) if (mdbx_pnl_grow(ppl, app[0]))
return MDBX_ENOMEM; return MDBX_ENOMEM;
ids = *idp; pnl = *ppl;
} }
memcpy(&ids[ids[0] + 1], &app[1], app[0] * sizeof(pgno_t)); memcpy(&pnl[pnl[0] + 1], &app[1], app[0] * sizeof(pgno_t));
ids[0] += app[0]; pnl[0] += app[0];
return 0; return 0;
} }
@ -351,139 +444,63 @@ static int mdbx_txl_append_list(MDBX_TXL *ptr, MDBX_TXL append) {
return 0; return 0;
} }
/* Append an ID range onto an IDL. /* Append an ID range onto an PNL.
* [in,out] idp Address of the IDL to append to. * [in,out] ppl Address of the PNL to append to.
* [in] id The lowest ID to append. * [in] id The lowest ID to append.
* [in] n Number of IDs to append. * [in] n Number of IDs to append.
* Returns 0 on success, MDBX_ENOMEM if the IDL is too large. */ * Returns 0 on success, MDBX_ENOMEM if the PNL is too large. */
static int mdbx_midl_append_range(MDBX_IDL *idp, pgno_t id, size_t n) { static int mdbx_pnl_append_range(MDBX_PNL *ppl, pgno_t id, size_t n) {
pgno_t *ids = *idp, len = ids[0]; pgno_t *pnl = *ppl, len = pnl[0];
/* Too big? */ /* Too big? */
if (unlikely(len + n > ids[-1])) { if (unlikely(len + n > pnl[-1])) {
if (mdbx_midl_grow(idp, n | MDBX_IDL_UM_MAX)) if (mdbx_pnl_grow(ppl, n | MDBX_PNL_UM_MAX))
return MDBX_ENOMEM; return MDBX_ENOMEM;
ids = *idp; pnl = *ppl;
} }
ids[0] = len + (pgno_t)n; pnl[0] = len + (pgno_t)n;
ids += len; pnl += len;
while (n) while (n)
ids[n--] = id++; pnl[n--] = id++;
return 0; return 0;
} }
/* Merge an IDL onto an IDL. The destination IDL must be big enough. /* Merge an PNL onto an PNL. The destination PNL must be big enough.
* [in] idl The IDL to merge into. * [in] pl The PNL to merge into.
* [in] merge The IDL to merge. */ * [in] merge The PNL to merge. */
static void __hot mdbx_midl_xmerge(MDBX_IDL idl, MDBX_IDL merge) { static void __hot mdbx_pnl_xmerge(MDBX_PNL pnl, MDBX_PNL merge) {
pgno_t old_id, merge_id, i = merge[0], j = idl[0], k = i + j, total = k; assert(mdbx_pnl_check(pnl));
idl[0] = ~(pgno_t)0; /* delimiter for idl scan below */ assert(mdbx_pnl_check(merge));
old_id = idl[j]; pgno_t old_id, merge_id, i = merge[0], j = pnl[0], k = i + j, total = k;
pnl[0] = ~(pgno_t)0; /* delimiter for pl scan below */
old_id = pnl[j];
while (i) { while (i) {
merge_id = merge[i--]; merge_id = merge[i--];
for (; old_id < merge_id; old_id = idl[--j]) for (; old_id < merge_id; old_id = pnl[--j])
idl[k--] = old_id; pnl[k--] = old_id;
idl[k--] = merge_id; pnl[k--] = merge_id;
} }
idl[0] = total; pnl[0] = total;
} assert(mdbx_pnl_check(pnl));
/* Sort an IDL.
* [in,out] ids The IDL to sort. */
static void __hot mdbx_midl_sort(MDBX_IDL ids) {
/* Max possible depth of int-indexed tree * 2 items/level */
int istack[sizeof(int) * CHAR_BIT * 2];
int i, j, k, l, ir, jstack;
pgno_t a;
/* Quicksort + Insertion sort for small arrays */
#define MIDL_SMALL 8
#define MIDL_SWAP(a, b) \
do { \
pgno_t tmp_pgno = (a); \
(a) = (b); \
(b) = tmp_pgno; \
} while (0)
ir = (int)ids[0];
l = 1;
jstack = 0;
for (;;) {
if (ir - l < MIDL_SMALL) { /* Insertion sort */
for (j = l + 1; j <= ir; j++) {
a = ids[j];
for (i = j - 1; i >= 1; i--) {
if (ids[i] >= a)
break;
ids[i + 1] = ids[i];
}
ids[i + 1] = a;
}
if (jstack == 0)
break;
ir = istack[jstack--];
l = istack[jstack--];
} else {
k = (l + ir) >> 1; /* Choose median of left, center, right */
MIDL_SWAP(ids[k], ids[l + 1]);
if (ids[l] < ids[ir])
MIDL_SWAP(ids[l], ids[ir]);
if (ids[l + 1] < ids[ir])
MIDL_SWAP(ids[l + 1], ids[ir]);
if (ids[l] < ids[l + 1])
MIDL_SWAP(ids[l], ids[l + 1]);
i = l + 1;
j = ir;
a = ids[l + 1];
for (;;) {
do
i++;
while (ids[i] > a);
do
j--;
while (ids[j] < a);
if (j < i)
break;
MIDL_SWAP(ids[i], ids[j]);
}
ids[l + 1] = ids[j];
ids[j] = a;
jstack += 2;
if (ir - i + 1 >= j - l) {
istack[jstack] = ir;
istack[jstack - 1] = i;
ir = j - 1;
} else {
istack[jstack] = j - 1;
istack[jstack - 1] = l;
l = i;
}
}
}
#undef MIDL_SMALL
#undef MIDL_SWAP
} }
/* Search for an ID in an ID2L. /* Search for an ID in an ID2L.
* [in] ids The ID2L to search. * [in] pnl The ID2L to search.
* [in] id The ID to search for. * [in] id The ID to search for.
* Returns The index of the first ID2 whose mid member is greater than * Returns The index of the first ID2 whose mid member is greater than
* or equal to id. */ * or equal to id. */
static unsigned __hot mdbx_mid2l_search(MDBX_ID2L ids, pgno_t id) { static unsigned __hot mdbx_mid2l_search(MDBX_ID2L pnl, pgno_t id) {
/* binary search of id in ids /* binary search of id in pnl
* if found, returns position of id * if found, returns position of id
* if not found, returns first position greater than id */ * if not found, returns first position greater than id */
unsigned base = 0; unsigned base = 0;
unsigned cursor = 1; unsigned cursor = 1;
int val = 0; int val = 0;
unsigned n = (unsigned)ids[0].mid; unsigned n = (unsigned)pnl[0].mid;
while (n > 0) { while (n > 0) {
unsigned pivot = n >> 1; unsigned pivot = n >> 1;
cursor = base + pivot + 1; cursor = base + pivot + 1;
val = mdbx_cmp2int(id, ids[cursor].mid); val = mdbx_cmp2int(id, pnl[cursor].mid);
if (val < 0) { if (val < 0) {
n = pivot; n = pivot;
@ -502,39 +519,39 @@ static unsigned __hot mdbx_mid2l_search(MDBX_ID2L ids, pgno_t id) {
} }
/* Insert an ID2 into a ID2L. /* Insert an ID2 into a ID2L.
* [in,out] ids The ID2L to insert into. * [in,out] pnl The ID2L to insert into.
* [in] id The ID2 to insert. * [in] id The ID2 to insert.
* Returns 0 on success, -1 if the ID was already present in the ID2L. */ * Returns 0 on success, -1 if the ID was already present in the ID2L. */
static int mdbx_mid2l_insert(MDBX_ID2L ids, MDBX_ID2 *id) { static int mdbx_mid2l_insert(MDBX_ID2L pnl, MDBX_ID2 *id) {
unsigned x = mdbx_mid2l_search(ids, id->mid); unsigned x = mdbx_mid2l_search(pnl, id->mid);
if (unlikely(x < 1)) if (unlikely(x < 1))
return /* internal error */ -2; return /* internal error */ -2;
if (x <= ids[0].mid && ids[x].mid == id->mid) if (x <= pnl[0].mid && pnl[x].mid == id->mid)
return /* duplicate */ -1; return /* duplicate */ -1;
if (unlikely(ids[0].mid >= MDBX_IDL_UM_MAX)) if (unlikely(pnl[0].mid >= MDBX_PNL_UM_MAX))
return /* too big */ -2; return /* too big */ -2;
/* insert id */ /* insert id */
ids[0].mid++; pnl[0].mid++;
for (unsigned i = (unsigned)ids[0].mid; i > x; i--) for (unsigned i = (unsigned)pnl[0].mid; i > x; i--)
ids[i] = ids[i - 1]; pnl[i] = pnl[i - 1];
ids[x] = *id; pnl[x] = *id;
return 0; return 0;
} }
/* Append an ID2 into a ID2L. /* Append an ID2 into a ID2L.
* [in,out] ids The ID2L to append into. * [in,out] pnl The ID2L to append into.
* [in] id The ID2 to append. * [in] id The ID2 to append.
* Returns 0 on success, -2 if the ID2L is too big. */ * Returns 0 on success, -2 if the ID2L is too big. */
static int mdbx_mid2l_append(MDBX_ID2L ids, MDBX_ID2 *id) { static int mdbx_mid2l_append(MDBX_ID2L pnl, MDBX_ID2 *id) {
/* Too big? */ /* Too big? */
if (unlikely(ids[0].mid >= MDBX_IDL_UM_MAX)) if (unlikely(pnl[0].mid >= MDBX_PNL_UM_MAX))
return -2; return -2;
ids[0].mid++; pnl[0].mid++;
ids[ids[0].mid] = *id; pnl[pnl[0].mid] = *id;
return 0; return 0;
} }
@ -1128,7 +1145,7 @@ static int mdbx_page_loose(MDBX_cursor *mc, MDBX_page *mp) {
txn->mt_loose_count++; txn->mt_loose_count++;
mp->mp_flags |= P_LOOSE; mp->mp_flags |= P_LOOSE;
} else { } else {
int rc = mdbx_midl_append(&txn->mt_befree_pages, pgno); int rc = mdbx_pnl_append(&txn->mt_befree_pages, pgno);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
} }
@ -1260,12 +1277,12 @@ static int mdbx_page_spill(MDBX_cursor *m0, MDBX_val *key, MDBX_val *data) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
if (!txn->mt_spill_pages) { if (!txn->mt_spill_pages) {
txn->mt_spill_pages = mdbx_midl_alloc(MDBX_IDL_UM_MAX); txn->mt_spill_pages = mdbx_pnl_alloc(MDBX_PNL_UM_MAX);
if (unlikely(!txn->mt_spill_pages)) if (unlikely(!txn->mt_spill_pages))
return MDBX_ENOMEM; return MDBX_ENOMEM;
} else { } else {
/* purge deleted slots */ /* purge deleted slots */
MDBX_IDL sl = txn->mt_spill_pages; MDBX_PNL sl = txn->mt_spill_pages;
pgno_t num = sl[0], j = 0; pgno_t num = sl[0], j = 0;
for (i = 1; i <= num; i++) { for (i = 1; i <= num; i++) {
if (!(sl[i] & 1)) if (!(sl[i] & 1))
@ -1285,8 +1302,8 @@ static int mdbx_page_spill(MDBX_cursor *m0, MDBX_val *key, MDBX_val *data) {
* of those pages will need to be used again. So now we spill only 1/8th * of those pages will need to be used again. So now we spill only 1/8th
* of the dirty pages. Testing revealed this to be a good tradeoff, * of the dirty pages. Testing revealed this to be a good tradeoff,
* better than 1/2, 1/4, or 1/10. */ * better than 1/2, 1/4, or 1/10. */
if (need < MDBX_IDL_UM_MAX / 8) if (need < MDBX_PNL_UM_MAX / 8)
need = MDBX_IDL_UM_MAX / 8; need = MDBX_PNL_UM_MAX / 8;
/* Save the page IDs of all the pages we're flushing */ /* Save the page IDs of all the pages we're flushing */
/* flush from the tail forward, this saves a lot of shifting later on. */ /* flush from the tail forward, this saves a lot of shifting later on. */
@ -1301,7 +1318,7 @@ static int mdbx_page_spill(MDBX_cursor *m0, MDBX_val *key, MDBX_val *data) {
MDBX_txn *tx2; MDBX_txn *tx2;
for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) { for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) {
if (tx2->mt_spill_pages) { if (tx2->mt_spill_pages) {
unsigned j = mdbx_midl_search(tx2->mt_spill_pages, pn); unsigned j = mdbx_pnl_search(tx2->mt_spill_pages, pn);
if (j <= tx2->mt_spill_pages[0] && tx2->mt_spill_pages[j] == pn) { if (j <= tx2->mt_spill_pages[0] && tx2->mt_spill_pages[j] == pn) {
dp->mp_flags |= P_KEEP; dp->mp_flags |= P_KEEP;
break; break;
@ -1311,12 +1328,12 @@ static int mdbx_page_spill(MDBX_cursor *m0, MDBX_val *key, MDBX_val *data) {
if (tx2) if (tx2)
continue; continue;
} }
rc = mdbx_midl_append(&txn->mt_spill_pages, pn); rc = mdbx_pnl_append(&txn->mt_spill_pages, pn);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto bailout; goto bailout;
need--; need--;
} }
mdbx_midl_sort(txn->mt_spill_pages); mdbx_pnl_sort(txn->mt_spill_pages);
/* Flush the spilled part of dirty list */ /* Flush the spilled part of dirty list */
rc = mdbx_page_flush(txn, i); rc = mdbx_page_flush(txn, i);
@ -1781,20 +1798,21 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
} }
} }
/* Append IDL from FreeDB record to me_reclaimed_pglist */ /* Append PNL from FreeDB record to me_reclaimed_pglist */
pgno_t *re_idl = (pgno_t *)data.iov_base; pgno_t *re_pnl = (pgno_t *)data.iov_base;
mdbx_tassert(txn, re_idl[0] == 0 || mdbx_tassert(txn, re_pnl[0] == 0 ||
data.iov_len == (re_idl[0] + 1) * sizeof(pgno_t)); data.iov_len == (re_pnl[0] + 1) * sizeof(pgno_t));
repg_pos = re_idl[0]; mdbx_tassert(txn, mdbx_pnl_check(re_pnl));
repg_pos = re_pnl[0];
if (!repg_list) { if (!repg_list) {
if (unlikely(!(env->me_reclaimed_pglist = repg_list = if (unlikely(!(env->me_reclaimed_pglist = repg_list =
mdbx_midl_alloc(repg_pos)))) { mdbx_pnl_alloc(repg_pos)))) {
rc = MDBX_ENOMEM; rc = MDBX_ENOMEM;
goto fail; goto fail;
} }
} else { } else {
if (unlikely((rc = mdbx_midl_need(&env->me_reclaimed_pglist, if (unlikely(
repg_pos)) != 0)) (rc = mdbx_pnl_need(&env->me_reclaimed_pglist, repg_pos)) != 0))
goto fail; goto fail;
repg_list = env->me_reclaimed_pglist; repg_list = env->me_reclaimed_pglist;
} }
@ -1807,17 +1825,17 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
env->me_last_reclaimed = last; env->me_last_reclaimed = last;
if (mdbx_debug_enabled(MDBX_DBG_EXTRA)) { if (mdbx_debug_enabled(MDBX_DBG_EXTRA)) {
mdbx_debug_extra("IDL read txn %" PRIaTXN " root %" PRIaPGNO mdbx_debug_extra("PNL read txn %" PRIaTXN " root %" PRIaPGNO
" num %u, IDL", " num %u, PNL",
last, txn->mt_dbs[FREE_DBI].md_root, repg_pos); last, txn->mt_dbs[FREE_DBI].md_root, repg_pos);
unsigned i; unsigned i;
for (i = repg_pos; i; i--) for (i = repg_pos; i; i--)
mdbx_debug_extra_print(" %" PRIaPGNO "", re_idl[i]); mdbx_debug_extra_print(" %" PRIaPGNO "", re_pnl[i]);
mdbx_debug_extra_print("\n"); mdbx_debug_extra_print("\n");
} }
/* Merge in descending sorted order */ /* Merge in descending sorted order */
mdbx_midl_xmerge(repg_list, re_idl); mdbx_pnl_xmerge(repg_list, re_pnl);
repg_len = repg_list[0]; repg_len = repg_list[0];
if (unlikely((flags & MDBX_ALLOC_CACHE) == 0)) { if (unlikely((flags & MDBX_ALLOC_CACHE) == 0)) {
@ -1834,7 +1852,7 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
} }
/* Don't try to coalesce too much. */ /* Don't try to coalesce too much. */
if (repg_len > MDBX_IDL_UM_SIZE / 2) if (repg_len > MDBX_PNL_UM_SIZE / 2)
break; break;
if (flags & MDBX_COALESCE) { if (flags & MDBX_COALESCE) {
if (repg_len /* current size */ >= env->me_maxfree_1pg / 2 || if (repg_len /* current size */ >= env->me_maxfree_1pg / 2 ||
@ -2016,7 +2034,7 @@ static int mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp, MDBX_page **ret) {
for (tx2 = txn; tx2; tx2 = tx2->mt_parent) { for (tx2 = txn; tx2; tx2 = tx2->mt_parent) {
if (!tx2->mt_spill_pages) if (!tx2->mt_spill_pages)
continue; continue;
x = mdbx_midl_search(tx2->mt_spill_pages, pn); x = mdbx_pnl_search(tx2->mt_spill_pages, pn);
if (x <= tx2->mt_spill_pages[0] && tx2->mt_spill_pages[x] == pn) { if (x <= tx2->mt_spill_pages[0] && tx2->mt_spill_pages[x] == pn) {
MDBX_page *np; MDBX_page *np;
int num; int num;
@ -2079,14 +2097,14 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
goto done; goto done;
} }
if (unlikely((rc = mdbx_midl_need(&txn->mt_befree_pages, 1)) || if (unlikely((rc = mdbx_pnl_need(&txn->mt_befree_pages, 1)) ||
(rc = mdbx_page_alloc(mc, 1, &np, MDBX_ALLOC_ALL)))) (rc = mdbx_page_alloc(mc, 1, &np, MDBX_ALLOC_ALL))))
goto fail; goto fail;
pgno = np->mp_pgno; pgno = np->mp_pgno;
mdbx_debug("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, DDBI(mc), mdbx_debug("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, DDBI(mc),
mp->mp_pgno, pgno); mp->mp_pgno, pgno);
mdbx_cassert(mc, mp->mp_pgno != pgno); mdbx_cassert(mc, mp->mp_pgno != pgno);
mdbx_midl_xappend(txn->mt_befree_pages, mp->mp_pgno); mdbx_pnl_xappend(txn->mt_befree_pages, mp->mp_pgno);
/* Update the parent page, if any, to point to the new page */ /* Update the parent page, if any, to point to the new page */
if (mc->mc_top) { if (mc->mc_top) {
MDBX_page *parent = mc->mc_pg[mc->mc_top - 1]; MDBX_page *parent = mc->mc_pg[mc->mc_top - 1];
@ -2115,7 +2133,7 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
} }
mdbx_debug("clone db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno); mdbx_debug("clone db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno);
mdbx_cassert(mc, dl[0].mid < MDBX_IDL_UM_MAX); mdbx_cassert(mc, dl[0].mid < MDBX_PNL_UM_MAX);
/* No - copy it */ /* No - copy it */
np = mdbx_page_malloc(txn, 1); np = mdbx_page_malloc(txn, 1);
if (unlikely(!np)) if (unlikely(!np))
@ -2477,7 +2495,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
txn->mt_child = NULL; txn->mt_child = NULL;
txn->mt_loose_pages = NULL; txn->mt_loose_pages = NULL;
txn->mt_loose_count = 0; txn->mt_loose_count = 0;
txn->mt_dirtyroom = MDBX_IDL_UM_MAX; txn->mt_dirtyroom = MDBX_PNL_UM_MAX;
txn->mt_rw_dirtylist = env->me_dirtylist; txn->mt_rw_dirtylist = env->me_dirtylist;
txn->mt_rw_dirtylist[0].mid = 0; txn->mt_rw_dirtylist[0].mid = 0;
txn->mt_befree_pages = env->me_free_pgs; txn->mt_befree_pages = env->me_free_pgs;
@ -2620,9 +2638,9 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
unsigned i; unsigned i;
txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
txn->mt_dbiseqs = parent->mt_dbiseqs; txn->mt_dbiseqs = parent->mt_dbiseqs;
txn->mt_rw_dirtylist = malloc(sizeof(MDBX_ID2) * MDBX_IDL_UM_SIZE); txn->mt_rw_dirtylist = malloc(sizeof(MDBX_ID2) * MDBX_PNL_UM_SIZE);
if (!txn->mt_rw_dirtylist || if (!txn->mt_rw_dirtylist ||
!(txn->mt_befree_pages = mdbx_midl_alloc(MDBX_IDL_UM_MAX))) { !(txn->mt_befree_pages = mdbx_pnl_alloc(MDBX_PNL_UM_MAX))) {
free(txn->mt_rw_dirtylist); free(txn->mt_rw_dirtylist);
free(txn); free(txn);
return MDBX_ENOMEM; return MDBX_ENOMEM;
@ -2646,8 +2664,8 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
ntxn->mnt_pgstate = ntxn->mnt_pgstate =
env->me_pgstate; /* save parent me_reclaimed_pglist & co */ env->me_pgstate; /* save parent me_reclaimed_pglist & co */
if (env->me_reclaimed_pglist) { if (env->me_reclaimed_pglist) {
size = MDBX_IDL_SIZEOF(env->me_reclaimed_pglist); size = MDBX_PNL_SIZEOF(env->me_reclaimed_pglist);
env->me_reclaimed_pglist = mdbx_midl_alloc(env->me_reclaimed_pglist[0]); env->me_reclaimed_pglist = mdbx_pnl_alloc(env->me_reclaimed_pglist[0]);
if (likely(env->me_reclaimed_pglist)) if (likely(env->me_reclaimed_pglist))
memcpy(env->me_reclaimed_pglist, ntxn->mnt_pgstate.mf_reclaimed_pglist, memcpy(env->me_reclaimed_pglist, ntxn->mnt_pgstate.mf_reclaimed_pglist,
size); size);
@ -2778,7 +2796,7 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
txn->mt_flags = MDBX_TXN_FINISHED; txn->mt_flags = MDBX_TXN_FINISHED;
if (!txn->mt_parent) { if (!txn->mt_parent) {
mdbx_midl_shrink(&txn->mt_befree_pages); mdbx_pnl_shrink(&txn->mt_befree_pages);
env->me_free_pgs = txn->mt_befree_pages; env->me_free_pgs = txn->mt_befree_pages;
/* me_pgstate: */ /* me_pgstate: */
env->me_reclaimed_pglist = NULL; env->me_reclaimed_pglist = NULL;
@ -2795,12 +2813,12 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
txn->mt_parent->mt_child = NULL; txn->mt_parent->mt_child = NULL;
txn->mt_parent->mt_flags &= ~MDBX_TXN_HAS_CHILD; txn->mt_parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
env->me_pgstate = ((MDBX_ntxn *)txn)->mnt_pgstate; env->me_pgstate = ((MDBX_ntxn *)txn)->mnt_pgstate;
mdbx_midl_free(txn->mt_befree_pages); mdbx_pnl_free(txn->mt_befree_pages);
mdbx_midl_free(txn->mt_spill_pages); mdbx_pnl_free(txn->mt_spill_pages);
free(txn->mt_rw_dirtylist); free(txn->mt_rw_dirtylist);
} }
mdbx_midl_free(pghead); mdbx_pnl_free(pghead);
} }
if (mode & MDBX_END_FREE) { if (mode & MDBX_END_FREE) {
@ -2962,32 +2980,32 @@ again_on_freelist_change:
/* Put loose page numbers in mt_free_pages, /* Put loose page numbers in mt_free_pages,
* since unable to return them to me_reclaimed_pglist. */ * since unable to return them to me_reclaimed_pglist. */
MDBX_page *mp = txn->mt_loose_pages; MDBX_page *mp = txn->mt_loose_pages;
if (unlikely((rc = mdbx_midl_need(&txn->mt_befree_pages, if (unlikely((rc = mdbx_pnl_need(&txn->mt_befree_pages,
txn->mt_loose_count)) != 0)) txn->mt_loose_count)) != 0))
return rc; return rc;
for (; mp; mp = NEXT_LOOSE_PAGE(mp)) for (; mp; mp = NEXT_LOOSE_PAGE(mp))
mdbx_midl_xappend(txn->mt_befree_pages, mp->mp_pgno); mdbx_pnl_xappend(txn->mt_befree_pages, mp->mp_pgno);
} else { } else {
/* Room for loose pages + temp IDL with same */ /* Room for loose pages + temp PNL with same */
if ((rc = mdbx_midl_need(&env->me_reclaimed_pglist, if ((rc = mdbx_pnl_need(&env->me_reclaimed_pglist,
2 * txn->mt_loose_count + 1)) != 0) 2 * txn->mt_loose_count + 1)) != 0)
goto bailout; goto bailout;
MDBX_IDL loose = env->me_reclaimed_pglist + MDBX_PNL loose = env->me_reclaimed_pglist +
MDBX_IDL_ALLOCLEN(env->me_reclaimed_pglist) - MDBX_PNL_ALLOCLEN(env->me_reclaimed_pglist) -
txn->mt_loose_count; txn->mt_loose_count;
unsigned count = 0; unsigned count = 0;
for (MDBX_page *mp = txn->mt_loose_pages; mp; mp = NEXT_LOOSE_PAGE(mp)) for (MDBX_page *mp = txn->mt_loose_pages; mp; mp = NEXT_LOOSE_PAGE(mp))
loose[++count] = mp->mp_pgno; loose[++count] = mp->mp_pgno;
loose[0] = count; loose[0] = count;
mdbx_midl_sort(loose); mdbx_pnl_sort(loose);
mdbx_midl_xmerge(env->me_reclaimed_pglist, loose); mdbx_pnl_xmerge(env->me_reclaimed_pglist, loose);
} }
txn->mt_loose_pages = NULL; txn->mt_loose_pages = NULL;
txn->mt_loose_count = 0; txn->mt_loose_count = 0;
} }
/* Save the IDL of pages freed by this txn, to a single record */ /* Save the PNL of pages freed by this txn, to a single record */
if (befree_count < txn->mt_befree_pages[0]) { if (befree_count < txn->mt_befree_pages[0]) {
if (unlikely(!befree_count)) { if (unlikely(!befree_count)) {
/* Make sure last page of freeDB is touched and on freelist */ /* Make sure last page of freeDB is touched and on freelist */
@ -3001,7 +3019,7 @@ again_on_freelist_change:
key.iov_base = &txn->mt_txnid; key.iov_base = &txn->mt_txnid;
do { do {
befree_count = befree_pages[0]; befree_count = befree_pages[0];
data.iov_len = MDBX_IDL_SIZEOF(befree_pages); data.iov_len = MDBX_PNL_SIZEOF(befree_pages);
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE); rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE);
if (unlikely(rc)) if (unlikely(rc))
goto bailout; goto bailout;
@ -3009,13 +3027,13 @@ again_on_freelist_change:
befree_pages = txn->mt_befree_pages; befree_pages = txn->mt_befree_pages;
} while (befree_count < befree_pages[0]); } while (befree_count < befree_pages[0]);
mdbx_midl_sort(befree_pages); mdbx_pnl_sort(befree_pages);
memcpy(data.iov_base, befree_pages, data.iov_len); memcpy(data.iov_base, befree_pages, data.iov_len);
if (mdbx_debug_enabled(MDBX_DBG_EXTRA)) { if (mdbx_debug_enabled(MDBX_DBG_EXTRA)) {
unsigned i = (unsigned)befree_pages[0]; unsigned i = (unsigned)befree_pages[0];
mdbx_debug_extra("IDL write txn %" PRIaTXN " root %" PRIaPGNO mdbx_debug_extra("PNL write txn %" PRIaTXN " root %" PRIaPGNO
" num %u, IDL", " num %u, PNL",
txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i); txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i);
for (; i; i--) for (; i; i--)
mdbx_debug_extra_print(" %" PRIaPGNO "", befree_pages[i]); mdbx_debug_extra_print(" %" PRIaPGNO "", befree_pages[i]);
@ -3084,7 +3102,7 @@ again_on_freelist_change:
mdbx_tassert(txn, txn->mt_lifo_reclaimed == NULL); mdbx_tassert(txn, txn->mt_lifo_reclaimed == NULL);
} }
/* (Re)write {key = head_id, IDL length = head_room} */ /* (Re)write {key = head_id, PNL length = head_room} */
total_room -= head_room; total_room -= head_room;
head_room = rpl_len - total_room; head_room = rpl_len - total_room;
if (head_room > (intptr_t)env->me_maxfree_1pg && head_id > 1) { if (head_room > (intptr_t)env->me_maxfree_1pg && head_id > 1) {
@ -3104,7 +3122,7 @@ again_on_freelist_change:
if (unlikely(rc)) if (unlikely(rc))
goto bailout; goto bailout;
/* IDL is initially empty, zero out at least the length */ /* PNL is initially empty, zero out at least the length */
pgno_t *pgs = (pgno_t *)data.iov_base; pgno_t *pgs = (pgno_t *)data.iov_base;
intptr_t i = head_room > clean_limit ? head_room : 0; intptr_t i = head_room > clean_limit ? head_room : 0;
do { do {
@ -3170,6 +3188,7 @@ again_on_freelist_change:
data.iov_base = rpl_end; data.iov_base = rpl_end;
pgno_t save = rpl_end[0]; pgno_t save = rpl_end[0];
rpl_end[0] = (pgno_t)chunk_len; rpl_end[0] = (pgno_t)chunk_len;
mdbx_tassert(txn, mdbx_pnl_check(rpl_end));
mc.mc_flags |= C_RECLAIMING; mc.mc_flags |= C_RECLAIMING;
rc = mdbx_cursor_put(&mc, &key, &data, MDBX_CURRENT); rc = mdbx_cursor_put(&mc, &key, &data, MDBX_CURRENT);
mc.mc_flags ^= C_RECLAIMING; mc.mc_flags ^= C_RECLAIMING;
@ -3358,7 +3377,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
MDBX_txn *parent = txn->mt_parent; MDBX_txn *parent = txn->mt_parent;
MDBX_page **lp; MDBX_page **lp;
MDBX_ID2L dst, src; MDBX_ID2L dst, src;
MDBX_IDL pspill; MDBX_PNL pspill;
unsigned i, x, y, len, ps_len; unsigned i, x, y, len, ps_len;
/* Append our reclaim list to parent's */ /* Append our reclaim list to parent's */
@ -3375,10 +3394,10 @@ int mdbx_txn_commit(MDBX_txn *txn) {
} }
/* Append our free list to parent's */ /* Append our free list to parent's */
rc = mdbx_midl_append_list(&parent->mt_befree_pages, txn->mt_befree_pages); rc = mdbx_pnl_append_list(&parent->mt_befree_pages, txn->mt_befree_pages);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
mdbx_midl_free(txn->mt_befree_pages); mdbx_pnl_free(txn->mt_befree_pages);
/* Failures after this must either undo the changes /* Failures after this must either undo the changes
* to the parent or set MDBX_TXN_ERROR in the parent. */ * to the parent or set MDBX_TXN_ERROR in the parent. */
@ -3458,7 +3477,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
} }
} }
} else { /* Simplify the above for single-ancestor case */ } else { /* Simplify the above for single-ancestor case */
len = MDBX_IDL_UM_MAX - txn->mt_dirtyroom; len = MDBX_PNL_UM_MAX - txn->mt_dirtyroom;
} }
/* Merge our dirty list with parent's */ /* Merge our dirty list with parent's */
y = src[0].mid; y = src[0].mid;
@ -3476,12 +3495,11 @@ int mdbx_txn_commit(MDBX_txn *txn) {
if (txn->mt_spill_pages) { if (txn->mt_spill_pages) {
if (parent->mt_spill_pages) { if (parent->mt_spill_pages) {
/* TODO: Prevent failure here, so parent does not fail */ /* TODO: Prevent failure here, so parent does not fail */
rc = rc = mdbx_pnl_append_list(&parent->mt_spill_pages, txn->mt_spill_pages);
mdbx_midl_append_list(&parent->mt_spill_pages, txn->mt_spill_pages);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
parent->mt_flags |= MDBX_TXN_ERROR; parent->mt_flags |= MDBX_TXN_ERROR;
mdbx_midl_free(txn->mt_spill_pages); mdbx_pnl_free(txn->mt_spill_pages);
mdbx_midl_sort(parent->mt_spill_pages); mdbx_pnl_sort(parent->mt_spill_pages);
} else { } else {
parent->mt_spill_pages = txn->mt_spill_pages; parent->mt_spill_pages = txn->mt_spill_pages;
} }
@ -3494,7 +3512,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
parent->mt_loose_count += txn->mt_loose_count; parent->mt_loose_count += txn->mt_loose_count;
parent->mt_child = NULL; parent->mt_child = NULL;
mdbx_midl_free(((MDBX_ntxn *)txn)->mnt_pgstate.mf_reclaimed_pglist); mdbx_pnl_free(((MDBX_ntxn *)txn)->mnt_pgstate.mf_reclaimed_pglist);
txn->mt_signature = 0; txn->mt_signature = 0;
free(txn); free(txn);
return rc; return rc;
@ -3544,9 +3562,9 @@ int mdbx_txn_commit(MDBX_txn *txn) {
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
mdbx_midl_free(env->me_reclaimed_pglist); mdbx_pnl_free(env->me_reclaimed_pglist);
env->me_reclaimed_pglist = NULL; env->me_reclaimed_pglist = NULL;
mdbx_midl_shrink(&txn->mt_befree_pages); mdbx_pnl_shrink(&txn->mt_befree_pages);
if (mdbx_audit_enabled()) if (mdbx_audit_enabled())
mdbx_audit(txn); mdbx_audit(txn);
@ -4136,13 +4154,12 @@ static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) {
mdbx_ensure(env, mdbx_is_power2(pagesize)); mdbx_ensure(env, mdbx_is_power2(pagesize));
mdbx_ensure(env, pagesize >= MIN_PAGESIZE); mdbx_ensure(env, pagesize >= MIN_PAGESIZE);
mdbx_ensure(env, pagesize <= MAX_PAGESIZE); mdbx_ensure(env, pagesize <= MAX_PAGESIZE);
env->me_psize = (unsigned)pagesize; env->me_psize = (unsigned)pagesize;
STATIC_ASSERT(mdbx_maxfree1pg(MIN_PAGESIZE) > 42); STATIC_ASSERT(mdbx_maxfree1pg(MIN_PAGESIZE) > 42);
STATIC_ASSERT(mdbx_maxfree1pg(MAX_PAGESIZE) < MDBX_IDL_DB_MAX); STATIC_ASSERT(mdbx_maxfree1pg(MAX_PAGESIZE) < MDBX_PNL_DB_MAX);
const intptr_t maxfree_1pg = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1; const intptr_t maxfree_1pg = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
mdbx_ensure(env, maxfree_1pg > 42 && maxfree_1pg < MDBX_IDL_DB_MAX); mdbx_ensure(env, maxfree_1pg > 42 && maxfree_1pg < MDBX_PNL_DB_MAX);
env->me_maxfree_1pg = (unsigned)maxfree_1pg; env->me_maxfree_1pg = (unsigned)maxfree_1pg;
STATIC_ASSERT(mdbx_nodemax(MIN_PAGESIZE) > 42); STATIC_ASSERT(mdbx_nodemax(MIN_PAGESIZE) > 42);
@ -4985,8 +5002,8 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
flags &= ~(MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOSYNC | MDBX_NOMETASYNC | flags &= ~(MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOSYNC | MDBX_NOMETASYNC |
MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT); MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT);
} else { } else {
if (!((env->me_free_pgs = mdbx_midl_alloc(MDBX_IDL_UM_MAX)) && if (!((env->me_free_pgs = mdbx_pnl_alloc(MDBX_PNL_UM_MAX)) &&
(env->me_dirtylist = calloc(MDBX_IDL_UM_SIZE, sizeof(MDBX_ID2))))) (env->me_dirtylist = calloc(MDBX_PNL_UM_SIZE, sizeof(MDBX_ID2)))))
rc = MDBX_ENOMEM; rc = MDBX_ENOMEM;
} }
env->me_flags = flags |= MDBX_ENV_ACTIVE; env->me_flags = flags |= MDBX_ENV_ACTIVE;
@ -5143,7 +5160,7 @@ static void __cold mdbx_env_close0(MDBX_env *env) {
mdbx_txl_free(env->me_txn0->mt_lifo_reclaimed); mdbx_txl_free(env->me_txn0->mt_lifo_reclaimed);
free(env->me_txn0); free(env->me_txn0);
} }
mdbx_midl_free(env->me_free_pgs); mdbx_pnl_free(env->me_free_pgs);
if (env->me_flags & MDBX_ENV_TXKEY) { if (env->me_flags & MDBX_ENV_TXKEY) {
mdbx_rthc_remove(env->me_txkey); mdbx_rthc_remove(env->me_txkey);
@ -5503,7 +5520,7 @@ static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
* leave that unless page_touch happens again). */ * leave that unless page_touch happens again). */
if (tx2->mt_spill_pages) { if (tx2->mt_spill_pages) {
pgno_t pn = pgno << 1; pgno_t pn = pgno << 1;
x = mdbx_midl_search(tx2->mt_spill_pages, pn); x = mdbx_pnl_search(tx2->mt_spill_pages, pn);
if (x <= tx2->mt_spill_pages[0] && tx2->mt_spill_pages[x] == pn) if (x <= tx2->mt_spill_pages[0] && tx2->mt_spill_pages[x] == pn)
goto mapped; goto mapped;
} }
@ -5728,7 +5745,7 @@ static int mdbx_ovpage_free(MDBX_cursor *mc, MDBX_page *mp) {
pgno_t pg = mp->mp_pgno; pgno_t pg = mp->mp_pgno;
unsigned x = 0, ovpages = mp->mp_pages; unsigned x = 0, ovpages = mp->mp_pages;
MDBX_env *env = txn->mt_env; MDBX_env *env = txn->mt_env;
MDBX_IDL sl = txn->mt_spill_pages; MDBX_PNL sl = txn->mt_spill_pages;
pgno_t pn = pg << 1; pgno_t pn = pg << 1;
int rc; int rc;
@ -5743,11 +5760,11 @@ static int mdbx_ovpage_free(MDBX_cursor *mc, MDBX_page *mp) {
* range in ancestor txns' dirty and spilled lists. */ * range in ancestor txns' dirty and spilled lists. */
if (env->me_reclaimed_pglist && !txn->mt_parent && if (env->me_reclaimed_pglist && !txn->mt_parent &&
((mp->mp_flags & P_DIRTY) || ((mp->mp_flags & P_DIRTY) ||
(sl && (x = mdbx_midl_search(sl, pn)) <= sl[0] && sl[x] == pn))) { (sl && (x = mdbx_pnl_search(sl, pn)) <= sl[0] && sl[x] == pn))) {
unsigned i, j; unsigned i, j;
pgno_t *mop; pgno_t *mop;
MDBX_ID2 *dl, ix, iy; MDBX_ID2 *dl, ix, iy;
rc = mdbx_midl_need(&env->me_reclaimed_pglist, ovpages); rc = mdbx_pnl_need(&env->me_reclaimed_pglist, ovpages);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
if (!(mp->mp_flags & P_DIRTY)) { if (!(mp->mp_flags & P_DIRTY)) {
@ -5789,7 +5806,7 @@ static int mdbx_ovpage_free(MDBX_cursor *mc, MDBX_page *mp) {
mop[j--] = pg++; mop[j--] = pg++;
mop[0] += ovpages; mop[0] += ovpages;
} else { } else {
rc = mdbx_midl_append_range(&txn->mt_befree_pages, pg, ovpages); rc = mdbx_pnl_append_range(&txn->mt_befree_pages, pg, ovpages);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
} }
@ -8422,7 +8439,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
mc->mc_db->md_root = P_INVALID; mc->mc_db->md_root = P_INVALID;
mc->mc_db->md_depth = 0; mc->mc_db->md_depth = 0;
mc->mc_db->md_leaf_pages = 0; mc->mc_db->md_leaf_pages = 0;
rc = mdbx_midl_append(&mc->mc_txn->mt_befree_pages, mp->mp_pgno); rc = mdbx_pnl_append(&mc->mc_txn->mt_befree_pages, mp->mp_pgno);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
/* Adjust cursors pointing to mp */ /* Adjust cursors pointing to mp */
@ -8450,7 +8467,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
} else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) { } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) {
int i; int i;
mdbx_debug("collapsing root page!"); mdbx_debug("collapsing root page!");
rc = mdbx_midl_append(&mc->mc_txn->mt_befree_pages, mp->mp_pgno); rc = mdbx_pnl_append(&mc->mc_txn->mt_befree_pages, mp->mp_pgno);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0));
@ -10122,8 +10139,8 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
if (unlikely(rc)) if (unlikely(rc))
goto done; goto done;
mdbx_cassert(mc, IS_OVERFLOW(omp)); mdbx_cassert(mc, IS_OVERFLOW(omp));
rc = mdbx_midl_append_range(&txn->mt_befree_pages, pg, rc =
omp->mp_pages); mdbx_pnl_append_range(&txn->mt_befree_pages, pg, omp->mp_pages);
if (unlikely(rc)) if (unlikely(rc))
goto done; goto done;
mc->mc_db->md_overflow_pages -= omp->mp_pages; mc->mc_db->md_overflow_pages -= omp->mp_pages;
@ -10139,14 +10156,14 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
if (!subs && !mc->mc_db->md_overflow_pages) if (!subs && !mc->mc_db->md_overflow_pages)
goto pop; goto pop;
} else { } else {
if (unlikely((rc = mdbx_midl_need(&txn->mt_befree_pages, n)) != 0)) if (unlikely((rc = mdbx_pnl_need(&txn->mt_befree_pages, n)) != 0))
goto done; goto done;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
pgno_t pg; pgno_t pg;
ni = NODEPTR(mp, i); ni = NODEPTR(mp, i);
pg = NODEPGNO(ni); pg = NODEPGNO(ni);
/* free it */ /* free it */
mdbx_midl_xappend(txn->mt_befree_pages, pg); mdbx_pnl_xappend(txn->mt_befree_pages, pg);
} }
} }
if (!mc->mc_top) if (!mc->mc_top)
@ -10169,7 +10186,7 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
} }
} }
/* free it */ /* free it */
rc = mdbx_midl_append(&txn->mt_befree_pages, mc->mc_db->md_root); rc = mdbx_pnl_append(&txn->mt_befree_pages, mc->mc_db->md_root);
done: done:
if (unlikely(rc)) if (unlikely(rc))
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;

View File

@ -1,4 +1,4 @@
/* mdbx_chk.c - memory-mapped database check tool */ /* mdbx_chk.c - memory-mapped database check tool */
/* /*
* Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru> * Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru>
@ -343,7 +343,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
data->iov_len); data->iov_len);
else { else {
number = *iptr++; number = *iptr++;
if (number >= MDBX_IDL_UM_MAX) if (number >= MDBX_PNL_UM_MAX)
problem_add("entry", record_number, "wrong idl length", "%" PRIiPTR "", problem_add("entry", record_number, "wrong idl length", "%" PRIiPTR "",
number); number);
else if ((number + 1) * sizeof(pgno_t) != data->iov_len) else if ((number + 1) * sizeof(pgno_t) != data->iov_len)