mdbx: radix sort for large chunks of PNL and DPL.

More for https://github.com/erthink/libmdbx/issues/132

Change-Id: I19b253f78069d4ecd4ec360a12121c78f182fc09
This commit is contained in:
Leonid Yuriev 2021-02-01 01:50:27 +03:00
parent 88bdf4b96f
commit 25c4df0d3e
3 changed files with 110 additions and 31 deletions

View File

@ -1304,6 +1304,7 @@ quanah
quicksort quicksort
Quinteiro Quinteiro
qwest qwest
radixsort
radvisory radvisory
RAII RAII
ramdev ramdev

View File

@ -2686,6 +2686,68 @@ static int lcklist_detach_locked(MDBX_env *env) {
} \ } \
} }
/*------------------------------------------------------------------------------
* LY: radix sort for large chunks */
#define RADIXSORT_IMPL(NAME, TYPE, EXTRACT_KEY) \
\
__hot static bool NAME##_radixsort(TYPE *const begin, \
const unsigned length) { \
TYPE *tmp = mdbx_malloc(sizeof(TYPE) * length); \
if (unlikely(!tmp)) \
return false; \
\
unsigned key_shift = 0, key_diff_mask; \
do { \
struct { \
unsigned a[256], b[256]; \
} counters; \
memset(&counters, 0, sizeof(counters)); \
\
key_diff_mask = 0; \
unsigned prev_key = EXTRACT_KEY(begin) >> key_shift; \
TYPE *r = begin, *end = begin + length; \
do { \
const unsigned key = EXTRACT_KEY(r) >> key_shift; \
counters.a[key & 255]++; \
counters.b[(key >> 8) & 255]++; \
key_diff_mask |= prev_key ^ key; \
prev_key = key; \
} while (++r != end); \
\
unsigned ta = 0, tb = 0; \
for (unsigned i = 0; i < 256; ++i) { \
const unsigned ia = counters.a[i]; \
counters.a[i] = ta; \
ta += ia; \
const unsigned ib = counters.b[i]; \
counters.b[i] = tb; \
tb += ib; \
} \
\
r = begin; \
do { \
const unsigned key = EXTRACT_KEY(r) >> key_shift; \
tmp[counters.a[key & 255]++] = *r; \
} while (++r != end); \
\
if (unlikely(key_diff_mask < 256)) { \
memcpy(begin, tmp, (char *)end - (char *)begin); \
break; \
} \
end = (r = tmp) + length; \
do { \
const unsigned key = EXTRACT_KEY(r) >> key_shift; \
begin[counters.b[(key >> 8) & 255]++] = *r; \
} while (++r != end); \
\
key_shift += 16; \
} while (key_diff_mask >> 16); \
\
mdbx_free(tmp); \
return true; \
}
/*------------------------------------------------------------------------------ /*------------------------------------------------------------------------------
* LY: Binary search */ * LY: Binary search */
@ -2995,9 +3057,18 @@ static MDBX_PNL mdbx_spill_purge(MDBX_txn *txn) {
return sl; return sl;
} }
#if MDBX_PNL_ASCENDING
#define MDBX_PNL_EXTRACT_KEY(ptr) (*(ptr))
#else
#define MDBX_PNL_EXTRACT_KEY(ptr) (P_INVALID - *(ptr))
#endif
RADIXSORT_IMPL(pgno, pgno_t, MDBX_PNL_EXTRACT_KEY)
SORT_IMPL(pgno_sort, false, pgno_t, MDBX_PNL_ORDERED) SORT_IMPL(pgno_sort, false, pgno_t, MDBX_PNL_ORDERED)
static __hot void mdbx_pnl_sort(MDBX_PNL pnl) { static __hot void mdbx_pnl_sort(MDBX_PNL pnl) {
pgno_sort(MDBX_PNL_BEGIN(pnl), MDBX_PNL_END(pnl)); if (likely(MDBX_PNL_SIZE(pnl) < MDBX_PNL_RADIXSORT_THRESHOLD) ||
!pgno_radixsort(&MDBX_PNL_FIRST(pnl), MDBX_PNL_SIZE(pnl)))
pgno_sort(MDBX_PNL_BEGIN(pnl), MDBX_PNL_END(pnl));
assert(mdbx_pnl_check(pnl, MAX_PAGENO + 1)); assert(mdbx_pnl_check(pnl, MAX_PAGENO + 1));
} }
@ -3229,41 +3300,47 @@ static int mdbx_dpl_alloc(MDBX_txn *txn) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
#define MDBX_DPL_EXTRACT_KEY(ptr) ((ptr)->pgno)
RADIXSORT_IMPL(dpl, MDBX_dp, MDBX_DPL_EXTRACT_KEY)
#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno) #define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno)
SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP) SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP)
__hot static MDBX_dpl *mdbx_dpl_sort_slowpath(MDBX_dpl *dl) { __hot static MDBX_dpl *mdbx_dpl_sort_slowpath(MDBX_dpl *dl) {
const unsigned unsorted = dl->length - dl->sorted; const unsigned unsorted = dl->length - dl->sorted;
if (dl->sorted > unsorted / 4 + 4 && if (likely(unsorted < MDBX_PNL_RADIXSORT_THRESHOLD) ||
dl->length + unsorted < dl->detent + MDBX_DPL_GAP_FOR_MERGESORT) { !dpl_radixsort(dl->items + 1, dl->length)) {
MDBX_dp *const sorted_begin = dl->items + 1; if (dl->sorted > unsorted / 4 + 4 &&
MDBX_dp *const sorted_end = sorted_begin + dl->sorted; dl->length + unsorted < dl->detent + MDBX_DPL_GAP_FOR_MERGESORT) {
MDBX_dp *const end = dl->items + dl->detent + MDBX_DPL_RESERVE_GAP; MDBX_dp *const sorted_begin = dl->items + 1;
MDBX_dp *const tmp = end - unsorted; MDBX_dp *const sorted_end = sorted_begin + dl->sorted;
assert(dl->items + dl->length + 1 < tmp); MDBX_dp *const end = dl->items + dl->detent + MDBX_DPL_RESERVE_GAP;
/* copy unsorted to the end of allocated space and sort it */ MDBX_dp *const tmp = end - unsorted;
memcpy(tmp, sorted_end, unsorted * sizeof(MDBX_dp)); assert(dl->items + dl->length + 1 < tmp);
dp_sort(tmp, tmp + unsorted); /* copy unsorted to the end of allocated space and sort it */
/* merge two parts from end to begin */ memcpy(tmp, sorted_end, unsorted * sizeof(MDBX_dp));
MDBX_dp *w = dl->items + dl->length; dp_sort(tmp, tmp + unsorted);
MDBX_dp *l = dl->items + dl->sorted; /* merge two parts from end to begin */
MDBX_dp *r = end - 1; MDBX_dp *w = dl->items + dl->length;
do { MDBX_dp *l = dl->items + dl->sorted;
const bool cmp = l->pgno > r->pgno; MDBX_dp *r = end - 1;
*w = cmp ? *l : *r; do {
l -= cmp; const bool cmp = l->pgno > r->pgno;
r -= !cmp; *w = cmp ? *l : *r;
} while (likely(--w > l)); l -= cmp;
assert(r == tmp - 1); r -= !cmp;
assert(dl->items[0].pgno == 0 && } while (likely(--w > l));
dl->items[dl->length + 1].pgno == P_INVALID); assert(r == tmp - 1);
if (mdbx_assert_enabled()) assert(dl->items[0].pgno == 0 &&
for (unsigned i = 0; i <= dl->length; ++i) dl->items[dl->length + 1].pgno == P_INVALID);
assert(dl->items[i].pgno < dl->items[i + 1].pgno); if (mdbx_assert_enabled())
} else { for (unsigned i = 0; i <= dl->length; ++i)
dp_sort(dl->items + 1, dl->items + dl->length + 1); assert(dl->items[i].pgno < dl->items[i + 1].pgno);
assert(dl->items[0].pgno == 0 && } else {
dl->items[dl->length + 1].pgno == P_INVALID); dp_sort(dl->items + 1, dl->items + dl->length + 1);
assert(dl->items[0].pgno == 0 &&
dl->items[dl->length + 1].pgno == P_INVALID);
}
} }
dl->sorted = dl->length; dl->sorted = dl->length;
return dl; return dl;

View File

@ -680,6 +680,7 @@ typedef struct MDBX_dpl {
/* PNL sizes */ /* PNL sizes */
#define MDBX_PNL_GRANULATE 1024 #define MDBX_PNL_GRANULATE 1024
#define MDBX_PNL_RADIXSORT_THRESHOLD 1024
#define MDBX_PNL_INITIAL \ #define MDBX_PNL_INITIAL \
(MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))