mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-30 16:12:05 +08:00
mdbx: support for huge transactions (MDBX_HUGE_TRANSACTIONS option).
Change-Id: I5d6cce6a7fb816add8cb4c066cc50f31cdebf9d5
This commit is contained in:
parent
fdc92b136f
commit
e008f3132d
@ -378,6 +378,7 @@ add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)
|
||||
add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" ${MDBX_BUILD_TOOLS_DEFAULT})
|
||||
add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON)
|
||||
add_mdbx_option(MDBX_TXN_CHECKPID "Paranoid checking PID inside libmdbx's API" AUTO)
|
||||
add_mdbx_option(MDBX_HUGE_TRANSACTIONS "Support for huge write-transactions" OFF)
|
||||
mark_as_advanced(MDBX_TXN_CHECKPID)
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#cmakedefine MDBX_FORCE_ASSERTIONS
|
||||
|
||||
/* Common */
|
||||
#cmakedefine01 MDBX_HUGE_TRANSACTIONS
|
||||
#cmakedefine01 MDBX_TXN_CHECKOWNER
|
||||
#cmakedefine MDBX_TXN_CHECKPID_AUTO
|
||||
#ifndef MDBX_TXN_CHECKPID_AUTO
|
||||
|
29
src/core.c
29
src/core.c
@ -394,9 +394,9 @@ __cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, unsigned flags) {
|
||||
const unsigned page_ln2 = log2n(pagesize);
|
||||
const size_t hard = 0x7FF00000ul;
|
||||
const size_t hard_pages = hard >> page_ln2;
|
||||
const size_t limit = (hard_pages < MDBX_DPL_TXNFULL)
|
||||
const size_t limit = (hard_pages < MDBX_DPL_TXNFULL / 3)
|
||||
? hard
|
||||
: ((size_t)MDBX_DPL_TXNFULL << page_ln2);
|
||||
: ((size_t)MDBX_DPL_TXNFULL / 3 << page_ln2);
|
||||
return (limit < MAX_MAPSIZE) ? limit / 2 : MAX_MAPSIZE / 2;
|
||||
}
|
||||
|
||||
@ -4201,8 +4201,17 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
|
||||
if (txn->tw.dirtyroom > i)
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
/* Less aggressive spill - we originally spilled the entire dirty list,
|
||||
* with a few exceptions for cursor pages and DB root pages. But this
|
||||
* turns out to be a lot of wasted effort because in a large txn many
|
||||
* of those pages will need to be used again. So now we spill only 1/8th
|
||||
* of the dirty pages. Testing revealed this to be a good tradeoff,
|
||||
* better than 1/2, 1/4, or 1/10. */
|
||||
if (need < MDBX_DPL_TXNFULL / 8)
|
||||
need = MDBX_DPL_TXNFULL / 8;
|
||||
|
||||
if (!txn->tw.spill_pages) {
|
||||
txn->tw.spill_pages = mdbx_pnl_alloc(MDBX_DPL_TXNFULL / 8);
|
||||
txn->tw.spill_pages = mdbx_pnl_alloc(need);
|
||||
if (unlikely(!txn->tw.spill_pages))
|
||||
return MDBX_ENOMEM;
|
||||
} else {
|
||||
@ -4221,15 +4230,6 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
/* Less aggressive spill - we originally spilled the entire dirty list,
|
||||
* with a few exceptions for cursor pages and DB root pages. But this
|
||||
* turns out to be a lot of wasted effort because in a large txn many
|
||||
* of those pages will need to be used again. So now we spill only 1/8th
|
||||
* of the dirty pages. Testing revealed this to be a good tradeoff,
|
||||
* better than 1/2, 1/4, or 1/10. */
|
||||
if (need < MDBX_DPL_TXNFULL / 8)
|
||||
need = MDBX_DPL_TXNFULL / 8;
|
||||
|
||||
/* Save the page IDs of all the pages we're flushing */
|
||||
/* flush from the tail forward, this saves a lot of shifting later on. */
|
||||
for (i = dl->length; i && need; i--) {
|
||||
@ -5173,7 +5173,7 @@ skip_cache:
|
||||
}
|
||||
|
||||
/* Don't try to coalesce too much. */
|
||||
if (unlikely(re_len > MDBX_DPL_TXNFULL / 4))
|
||||
if (unlikely(re_len > MDBX_DPL_TXNFULL / 42))
|
||||
break;
|
||||
if (re_len /* current size */ >= env->me_maxgc_ov1page ||
|
||||
(re_len > prev_re_len && re_len - prev_re_len /* delta from prev */ >=
|
||||
@ -18813,6 +18813,9 @@ __dll_export
|
||||
#else
|
||||
#error "FIXME: Unsupported byte order"
|
||||
#endif /* __BYTE_ORDER__ */
|
||||
#if MDBX_HUGE_TRANSACTIONS
|
||||
" MDBX_HUGE_TRANSACTIONS=YES"
|
||||
#endif /* MDBX_HUGE_TRANSACTIONS */
|
||||
" MDBX_TXN_CHECKPID=" MDBX_TXN_CHECKPID_CONFIG
|
||||
" MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG
|
||||
" MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG
|
||||
|
@ -647,9 +647,16 @@ typedef MDBX_DP *MDBX_DPL;
|
||||
#define MDBX_PNL_GRANULATE 1024
|
||||
#define MDBX_PNL_INITIAL \
|
||||
(MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
||||
|
||||
#if MDBX_HUGE_TRANSACTIONS
|
||||
#define MDBX_PNL_MAX \
|
||||
((1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
||||
#define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 2)
|
||||
#else
|
||||
#define MDBX_PNL_MAX \
|
||||
((1u << 24) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
||||
#define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 4)
|
||||
#endif /* MDBX_HUGE_TRANSACTIONS */
|
||||
|
||||
#define MDBX_TXL_GRANULATE 32
|
||||
#define MDBX_TXL_INITIAL \
|
||||
@ -925,7 +932,7 @@ struct MDBX_env {
|
||||
MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
/* MDBX_DP of pages written during a write txn. Length MDBX_DPL_TXNFULL. */
|
||||
/* MDBX_DP of pages written during a write txn. */
|
||||
MDBX_DPL me_dirtylist;
|
||||
/* Number of freelist items that can fit in a single overflow page */
|
||||
unsigned me_maxgc_ov1page;
|
||||
|
@ -13,6 +13,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
/* Support for huge write-transactions */
|
||||
#ifndef MDBX_HUGE_TRANSACTIONS
|
||||
#define MDBX_HUGE_TRANSACTIONS 0
|
||||
#endif /* MDBX_HUGE_TRANSACTIONS */
|
||||
|
||||
/* using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
|
||||
#define MDBX_OSX_WANNA_DURABILITY 0
|
||||
/* using fsync() with chance of data lost on power failure */
|
||||
|
Loading…
x
Reference in New Issue
Block a user