mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:44:13 +08:00
mdbx: dynamically discarding unused tail pages of DB file.
Change-Id: I1a0eee50cd27de26521e65c9f7ea51a527a0424e
This commit is contained in:
parent
327e5feb97
commit
51e7159f36
@ -475,6 +475,9 @@ typedef struct MDBX_lockinfo {
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
volatile pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
volatile pgno_t mti_discarded_tail;
|
||||
|
||||
alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/
|
||||
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
@ -820,6 +823,7 @@ struct MDBX_env {
|
||||
volatile uint64_t *me_autosync_period;
|
||||
volatile pgno_t *me_unsynced_pages;
|
||||
volatile pgno_t *me_autosync_threshold;
|
||||
volatile pgno_t *me_discarded_tail;
|
||||
MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */
|
||||
struct {
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
@ -830,6 +834,7 @@ struct MDBX_env {
|
||||
uint64_t autosync_period;
|
||||
pgno_t autosync_pending;
|
||||
pgno_t autosync_threshold;
|
||||
pgno_t discarded_tail;
|
||||
} me_lckless_stub;
|
||||
#if MDBX_DEBUG
|
||||
MDBX_assert_func *me_assert_func; /* Callback for assertion failures */
|
||||
|
@ -663,11 +663,21 @@ MDBX_srwlock_function mdbx_srwlock_Init, mdbx_srwlock_AcquireShared,
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static DWORD WINAPI stub_DiscardVirtualMemory(PVOID VirtualAddress,
|
||||
SIZE_T Size) {
|
||||
return VirtualAlloc(VirtualAddress, Size, MEM_RESET, PAGE_NOACCESS)
|
||||
? ERROR_SUCCESS
|
||||
: GetLastError();
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx;
|
||||
MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW;
|
||||
MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW;
|
||||
MDBX_SetFileInformationByHandle mdbx_SetFileInformationByHandle;
|
||||
MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory;
|
||||
MDBX_DiscardVirtualMemory mdbx_DiscardVirtualMemory;
|
||||
MDBX_NtFsControlFile mdbx_NtFsControlFile;
|
||||
|
||||
static void mdbx_winnt_import(void) {
|
||||
@ -700,6 +710,9 @@ static void mdbx_winnt_import(void) {
|
||||
GET_KERNEL32_PROC(GetFinalPathNameByHandleW);
|
||||
GET_KERNEL32_PROC(SetFileInformationByHandle);
|
||||
GET_KERNEL32_PROC(PrefetchVirtualMemory);
|
||||
GET_KERNEL32_PROC(DiscardVirtualMemory);
|
||||
if (!mdbx_DiscardVirtualMemory)
|
||||
mdbx_DiscardVirtualMemory = stub_DiscardVirtualMemory;
|
||||
|
||||
const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll");
|
||||
mdbx_NtFsControlFile =
|
||||
|
84
src/mdbx.c
84
src/mdbx.c
@ -5400,31 +5400,51 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
||||
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
|
||||
}
|
||||
|
||||
/* LY: check conditions to shrink datafile */
|
||||
const pgno_t backlog_gap =
|
||||
pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env);
|
||||
pgno_t shrink = 0;
|
||||
if ((flags & MDBX_SHRINK_ALLOWED) && pending->mm_geo.shrink &&
|
||||
pending->mm_geo.now - pending->mm_geo.next >
|
||||
pending->mm_geo.shrink + backlog_gap) {
|
||||
const pgno_t largest = mdbx_find_largest(
|
||||
if (flags & MDBX_SHRINK_ALLOWED) {
|
||||
/* LY: check conditions to discard unused pages */
|
||||
const pgno_t largest_pgno = mdbx_find_largest(
|
||||
env, (head->mm_geo.next > pending->mm_geo.next) ? head->mm_geo.next
|
||||
: pending->mm_geo.next);
|
||||
if (pending->mm_geo.now > largest &&
|
||||
pending->mm_geo.now - largest > pending->mm_geo.shrink + backlog_gap) {
|
||||
const pgno_t aligner =
|
||||
pending->mm_geo.grow ? pending->mm_geo.grow : pending->mm_geo.shrink;
|
||||
const pgno_t with_backlog_gap = largest + backlog_gap;
|
||||
const pgno_t aligned = pgno_align2os_pgno(
|
||||
env, with_backlog_gap + aligner - with_backlog_gap % aligner);
|
||||
const pgno_t bottom =
|
||||
(aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower;
|
||||
if (pending->mm_geo.now > bottom) {
|
||||
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
|
||||
shrink = pending->mm_geo.now - bottom;
|
||||
pending->mm_geo.now = bottom;
|
||||
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a)
|
||||
mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1);
|
||||
const size_t largest_aligned2os_bytes =
|
||||
pgno_align2os_bytes(env, largest_pgno);
|
||||
const pgno_t largest_aligned2os_pgno =
|
||||
bytes2pgno(env, largest_aligned2os_bytes);
|
||||
const pgno_t prev_discarded_pgno = *env->me_discarded_tail;
|
||||
*env->me_discarded_tail = largest_aligned2os_pgno;
|
||||
if (prev_discarded_pgno > largest_aligned2os_pgno) {
|
||||
const size_t prev_discarded_bytes =
|
||||
pgno_align2os_bytes(env, prev_discarded_pgno);
|
||||
mdbx_ensure(env, prev_discarded_bytes > largest_aligned2os_bytes);
|
||||
#if defined(MADV_REMOVE_OR_FREE_OR_DONTNEED)
|
||||
(void)madvise(env->me_map + largest_aligned2os_bytes,
|
||||
prev_discarded_bytes - largest_aligned2os_bytes,
|
||||
MADV_REMOVE_OR_FREE_OR_DONTNEED);
|
||||
#endif /* MADV_REMOVE_OR_FREE_OR_DONTNEED */
|
||||
}
|
||||
|
||||
/* LY: check conditions to shrink datafile */
|
||||
const pgno_t backlog_gap =
|
||||
pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env);
|
||||
if (pending->mm_geo.shrink && pending->mm_geo.now - pending->mm_geo.next >
|
||||
pending->mm_geo.shrink + backlog_gap) {
|
||||
if (pending->mm_geo.now > largest_pgno &&
|
||||
pending->mm_geo.now - largest_pgno >
|
||||
pending->mm_geo.shrink + backlog_gap) {
|
||||
const pgno_t aligner = pending->mm_geo.grow ? pending->mm_geo.grow
|
||||
: pending->mm_geo.shrink;
|
||||
const pgno_t with_backlog_gap = largest_pgno + backlog_gap;
|
||||
const pgno_t aligned = pgno_align2os_pgno(
|
||||
env, with_backlog_gap + aligner - with_backlog_gap % aligner);
|
||||
const pgno_t bottom =
|
||||
(aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower;
|
||||
if (pending->mm_geo.now > bottom) {
|
||||
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
|
||||
shrink = pending->mm_geo.now - bottom;
|
||||
pending->mm_geo.now = bottom;
|
||||
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a)
|
||||
mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -5755,14 +5775,16 @@ static int __cold mdbx_env_map(MDBX_env *env, const int is_exclusive,
|
||||
#endif
|
||||
|
||||
if (is_exclusive && (env->me_flags & MDBX_WRITEMAP) != 0) {
|
||||
#ifdef MADV_REMOVE_OR_FREE
|
||||
const size_t used_alined2os = mdbx_roundup2(usedsize, env->me_os_psize);
|
||||
if (used_alined2os < env->me_mapsize)
|
||||
(void)madvise(env->me_map + used_alined2os,
|
||||
env->me_mapsize - used_alined2os, MADV_REMOVE_OR_FREE);
|
||||
#else
|
||||
(void)usedsize;
|
||||
#endif
|
||||
const size_t used_aligned2os_bytes =
|
||||
mdbx_roundup2(usedsize, env->me_os_psize);
|
||||
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
|
||||
if (used_aligned2os_bytes < env->me_mapsize) {
|
||||
#if defined(MADV_REMOVE_OR_FREE_OR_DONTNEED)
|
||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||
env->me_mapsize - used_aligned2os_bytes,
|
||||
MADV_REMOVE_OR_FREE_OR_DONTNEED);
|
||||
#endif /* MADV_REMOVE_OR_FREE_OR_DONTNEED */
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef POSIX_FADV_RANDOM
|
||||
@ -6511,6 +6533,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
env->me_autosync_period = &env->me_lckless_stub.autosync_period;
|
||||
env->me_unsynced_pages = &env->me_lckless_stub.autosync_pending;
|
||||
env->me_autosync_threshold = &env->me_lckless_stub.autosync_threshold;
|
||||
env->me_discarded_tail = &env->me_lckless_stub.discarded_tail;
|
||||
env->me_maxreaders = UINT_MAX;
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
env->me_wmutex = &env->me_lckless_stub.wmutex;
|
||||
@ -6623,6 +6646,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
env->me_autosync_period = &env->me_lck->mti_autosync_period;
|
||||
env->me_unsynced_pages = &env->me_lck->mti_unsynced_pages;
|
||||
env->me_autosync_threshold = &env->me_lck->mti_autosync_threshold;
|
||||
env->me_discarded_tail = &env->me_lck->mti_discarded_tail;
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
env->me_wmutex = &env->me_lck->mti_wmutex;
|
||||
#endif
|
||||
|
14
src/osal.h
14
src/osal.h
@ -200,13 +200,15 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
|
||||
#define MADV_DONTDUMP MADV_NOCORE
|
||||
#endif /* MADV_NOCORE -> MADV_DONTDUMP */
|
||||
|
||||
#ifndef MADV_REMOVE_OR_FREE
|
||||
#ifndef MADV_REMOVE_OR_FREE_OR_DONTNEED
|
||||
#ifdef MADV_REMOVE
|
||||
#define MADV_REMOVE_OR_FREE MADV_REMOVE
|
||||
#define MADV_REMOVE_OR_FREE_OR_DONTNEED MADV_REMOVE
|
||||
#elif defined(MADV_FREE)
|
||||
#define MADV_REMOVE_OR_FREE MADV_FREE
|
||||
#define MADV_REMOVE_OR_FREE_OR_DONTNEED MADV_FREE
|
||||
#elif defined(MADV_DONTNEED)
|
||||
#define MADV_REMOVE_OR_FREE_OR_DONTNEED MADV_DONTNEED
|
||||
#endif
|
||||
#endif /* MADV_REMOVE_OR_FREE */
|
||||
#endif /* MADV_REMOVE_OR_FREE_OR_DONTNEED */
|
||||
|
||||
#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
|
||||
defined(i486) || defined(__i486) || defined(__i486__) || \
|
||||
@ -765,6 +767,10 @@ typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)(
|
||||
PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags);
|
||||
extern MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory;
|
||||
|
||||
typedef DWORD(WINAPI *MDBX_DiscardVirtualMemory)(PVOID VirtualAddress,
|
||||
SIZE_T Size);
|
||||
extern MDBX_DiscardVirtualMemory mdbx_DiscardVirtualMemory;
|
||||
|
||||
#endif /* Windows */
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user