mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-30 22:47:16 +08:00
mdbx: support for huge transactions (MDBX_HUGE_TRANSACTIONS option).
Change-Id: I5d6cce6a7fb816add8cb4c066cc50f31cdebf9d5
This commit is contained in:
parent
fdc92b136f
commit
e008f3132d
@ -378,6 +378,7 @@ add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL)
|
|||||||
add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" ${MDBX_BUILD_TOOLS_DEFAULT})
|
add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" ${MDBX_BUILD_TOOLS_DEFAULT})
|
||||||
add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON)
|
add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON)
|
||||||
add_mdbx_option(MDBX_TXN_CHECKPID "Paranoid checking PID inside libmdbx's API" AUTO)
|
add_mdbx_option(MDBX_TXN_CHECKPID "Paranoid checking PID inside libmdbx's API" AUTO)
|
||||||
|
add_mdbx_option(MDBX_HUGE_TRANSACTIONS "Support for huge write-transactions" OFF)
|
||||||
mark_as_advanced(MDBX_TXN_CHECKPID)
|
mark_as_advanced(MDBX_TXN_CHECKPID)
|
||||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||||
add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF)
|
add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF)
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#cmakedefine MDBX_FORCE_ASSERTIONS
|
#cmakedefine MDBX_FORCE_ASSERTIONS
|
||||||
|
|
||||||
/* Common */
|
/* Common */
|
||||||
|
#cmakedefine01 MDBX_HUGE_TRANSACTIONS
|
||||||
#cmakedefine01 MDBX_TXN_CHECKOWNER
|
#cmakedefine01 MDBX_TXN_CHECKOWNER
|
||||||
#cmakedefine MDBX_TXN_CHECKPID_AUTO
|
#cmakedefine MDBX_TXN_CHECKPID_AUTO
|
||||||
#ifndef MDBX_TXN_CHECKPID_AUTO
|
#ifndef MDBX_TXN_CHECKPID_AUTO
|
||||||
|
29
src/core.c
29
src/core.c
@ -394,9 +394,9 @@ __cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, unsigned flags) {
|
|||||||
const unsigned page_ln2 = log2n(pagesize);
|
const unsigned page_ln2 = log2n(pagesize);
|
||||||
const size_t hard = 0x7FF00000ul;
|
const size_t hard = 0x7FF00000ul;
|
||||||
const size_t hard_pages = hard >> page_ln2;
|
const size_t hard_pages = hard >> page_ln2;
|
||||||
const size_t limit = (hard_pages < MDBX_DPL_TXNFULL)
|
const size_t limit = (hard_pages < MDBX_DPL_TXNFULL / 3)
|
||||||
? hard
|
? hard
|
||||||
: ((size_t)MDBX_DPL_TXNFULL << page_ln2);
|
: ((size_t)MDBX_DPL_TXNFULL / 3 << page_ln2);
|
||||||
return (limit < MAX_MAPSIZE) ? limit / 2 : MAX_MAPSIZE / 2;
|
return (limit < MAX_MAPSIZE) ? limit / 2 : MAX_MAPSIZE / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4201,8 +4201,17 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
|
|||||||
if (txn->tw.dirtyroom > i)
|
if (txn->tw.dirtyroom > i)
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
|
|
||||||
|
/* Less aggressive spill - we originally spilled the entire dirty list,
|
||||||
|
* with a few exceptions for cursor pages and DB root pages. But this
|
||||||
|
* turns out to be a lot of wasted effort because in a large txn many
|
||||||
|
* of those pages will need to be used again. So now we spill only 1/8th
|
||||||
|
* of the dirty pages. Testing revealed this to be a good tradeoff,
|
||||||
|
* better than 1/2, 1/4, or 1/10. */
|
||||||
|
if (need < MDBX_DPL_TXNFULL / 8)
|
||||||
|
need = MDBX_DPL_TXNFULL / 8;
|
||||||
|
|
||||||
if (!txn->tw.spill_pages) {
|
if (!txn->tw.spill_pages) {
|
||||||
txn->tw.spill_pages = mdbx_pnl_alloc(MDBX_DPL_TXNFULL / 8);
|
txn->tw.spill_pages = mdbx_pnl_alloc(need);
|
||||||
if (unlikely(!txn->tw.spill_pages))
|
if (unlikely(!txn->tw.spill_pages))
|
||||||
return MDBX_ENOMEM;
|
return MDBX_ENOMEM;
|
||||||
} else {
|
} else {
|
||||||
@ -4221,15 +4230,6 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
|
|||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
/* Less aggressive spill - we originally spilled the entire dirty list,
|
|
||||||
* with a few exceptions for cursor pages and DB root pages. But this
|
|
||||||
* turns out to be a lot of wasted effort because in a large txn many
|
|
||||||
* of those pages will need to be used again. So now we spill only 1/8th
|
|
||||||
* of the dirty pages. Testing revealed this to be a good tradeoff,
|
|
||||||
* better than 1/2, 1/4, or 1/10. */
|
|
||||||
if (need < MDBX_DPL_TXNFULL / 8)
|
|
||||||
need = MDBX_DPL_TXNFULL / 8;
|
|
||||||
|
|
||||||
/* Save the page IDs of all the pages we're flushing */
|
/* Save the page IDs of all the pages we're flushing */
|
||||||
/* flush from the tail forward, this saves a lot of shifting later on. */
|
/* flush from the tail forward, this saves a lot of shifting later on. */
|
||||||
for (i = dl->length; i && need; i--) {
|
for (i = dl->length; i && need; i--) {
|
||||||
@ -5173,7 +5173,7 @@ skip_cache:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Don't try to coalesce too much. */
|
/* Don't try to coalesce too much. */
|
||||||
if (unlikely(re_len > MDBX_DPL_TXNFULL / 4))
|
if (unlikely(re_len > MDBX_DPL_TXNFULL / 42))
|
||||||
break;
|
break;
|
||||||
if (re_len /* current size */ >= env->me_maxgc_ov1page ||
|
if (re_len /* current size */ >= env->me_maxgc_ov1page ||
|
||||||
(re_len > prev_re_len && re_len - prev_re_len /* delta from prev */ >=
|
(re_len > prev_re_len && re_len - prev_re_len /* delta from prev */ >=
|
||||||
@ -18813,6 +18813,9 @@ __dll_export
|
|||||||
#else
|
#else
|
||||||
#error "FIXME: Unsupported byte order"
|
#error "FIXME: Unsupported byte order"
|
||||||
#endif /* __BYTE_ORDER__ */
|
#endif /* __BYTE_ORDER__ */
|
||||||
|
#if MDBX_HUGE_TRANSACTIONS
|
||||||
|
" MDBX_HUGE_TRANSACTIONS=YES"
|
||||||
|
#endif /* MDBX_HUGE_TRANSACTIONS */
|
||||||
" MDBX_TXN_CHECKPID=" MDBX_TXN_CHECKPID_CONFIG
|
" MDBX_TXN_CHECKPID=" MDBX_TXN_CHECKPID_CONFIG
|
||||||
" MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG
|
" MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG
|
||||||
" MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG
|
" MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG
|
||||||
|
@ -647,9 +647,16 @@ typedef MDBX_DP *MDBX_DPL;
|
|||||||
#define MDBX_PNL_GRANULATE 1024
|
#define MDBX_PNL_GRANULATE 1024
|
||||||
#define MDBX_PNL_INITIAL \
|
#define MDBX_PNL_INITIAL \
|
||||||
(MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
(MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
||||||
|
|
||||||
|
#if MDBX_HUGE_TRANSACTIONS
|
||||||
|
#define MDBX_PNL_MAX \
|
||||||
|
((1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
||||||
|
#define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 2)
|
||||||
|
#else
|
||||||
#define MDBX_PNL_MAX \
|
#define MDBX_PNL_MAX \
|
||||||
((1u << 24) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
((1u << 24) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
||||||
#define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 4)
|
#define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 4)
|
||||||
|
#endif /* MDBX_HUGE_TRANSACTIONS */
|
||||||
|
|
||||||
#define MDBX_TXL_GRANULATE 32
|
#define MDBX_TXL_GRANULATE 32
|
||||||
#define MDBX_TXL_INITIAL \
|
#define MDBX_TXL_INITIAL \
|
||||||
@ -925,7 +932,7 @@ struct MDBX_env {
|
|||||||
MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */
|
MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */
|
||||||
/* PNL of pages that became unused in a write txn */
|
/* PNL of pages that became unused in a write txn */
|
||||||
MDBX_PNL me_retired_pages;
|
MDBX_PNL me_retired_pages;
|
||||||
/* MDBX_DP of pages written during a write txn. Length MDBX_DPL_TXNFULL. */
|
/* MDBX_DP of pages written during a write txn. */
|
||||||
MDBX_DPL me_dirtylist;
|
MDBX_DPL me_dirtylist;
|
||||||
/* Number of freelist items that can fit in a single overflow page */
|
/* Number of freelist items that can fit in a single overflow page */
|
||||||
unsigned me_maxgc_ov1page;
|
unsigned me_maxgc_ov1page;
|
||||||
|
@ -13,6 +13,11 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Support for huge write-transactions */
|
||||||
|
#ifndef MDBX_HUGE_TRANSACTIONS
|
||||||
|
#define MDBX_HUGE_TRANSACTIONS 0
|
||||||
|
#endif /* MDBX_HUGE_TRANSACTIONS */
|
||||||
|
|
||||||
/* using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
|
/* using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
|
||||||
#define MDBX_OSX_WANNA_DURABILITY 0
|
#define MDBX_OSX_WANNA_DURABILITY 0
|
||||||
/* using fsync() with chance of data lost on power failure */
|
/* using fsync() with chance of data lost on power failure */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user