diff --git a/src/bits.h b/src/bits.h index fd36cac9..27a769c5 100644 --- a/src/bits.h +++ b/src/bits.h @@ -475,6 +475,9 @@ typedef struct MDBX_lockinfo { /* Number un-synced-with-disk pages for auto-sync feature. */ volatile pgno_t mti_unsynced_pages; + /* Number of page which was discarded last time by madvise(MADV_FREE). */ + volatile pgno_t mti_discarded_tail; + alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/ #ifdef MDBX_OSAL_LOCK @@ -820,6 +823,7 @@ struct MDBX_env { volatile uint64_t *me_autosync_period; volatile pgno_t *me_unsynced_pages; volatile pgno_t *me_autosync_threshold; + volatile pgno_t *me_discarded_tail; MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */ struct { #ifdef MDBX_OSAL_LOCK @@ -830,6 +834,7 @@ struct MDBX_env { uint64_t autosync_period; pgno_t autosync_pending; pgno_t autosync_threshold; + pgno_t discarded_tail; } me_lckless_stub; #if MDBX_DEBUG MDBX_assert_func *me_assert_func; /* Callback for assertion failures */ diff --git a/src/lck-windows.c b/src/lck-windows.c index 34a6e5a5..01fce3dd 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -663,11 +663,21 @@ MDBX_srwlock_function mdbx_srwlock_Init, mdbx_srwlock_AcquireShared, /*----------------------------------------------------------------------------*/ +static DWORD WINAPI stub_DiscardVirtualMemory(PVOID VirtualAddress, + SIZE_T Size) { + return VirtualAlloc(VirtualAddress, Size, MEM_RESET, PAGE_NOACCESS) + ? ERROR_SUCCESS + : GetLastError(); +} + +/*----------------------------------------------------------------------------*/ + MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx; MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW; MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW; MDBX_SetFileInformationByHandle mdbx_SetFileInformationByHandle; MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; +MDBX_DiscardVirtualMemory mdbx_DiscardVirtualMemory; MDBX_NtFsControlFile mdbx_NtFsControlFile; static void mdbx_winnt_import(void) { @@ -700,6 +710,9 @@ static void mdbx_winnt_import(void) { GET_KERNEL32_PROC(GetFinalPathNameByHandleW); GET_KERNEL32_PROC(SetFileInformationByHandle); GET_KERNEL32_PROC(PrefetchVirtualMemory); + GET_KERNEL32_PROC(DiscardVirtualMemory); + if (!mdbx_DiscardVirtualMemory) + mdbx_DiscardVirtualMemory = stub_DiscardVirtualMemory; const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll"); mdbx_NtFsControlFile = diff --git a/src/mdbx.c b/src/mdbx.c index a8edbc64..688e7279 100644 --- a/src/mdbx.c +++ b/src/mdbx.c @@ -5400,31 +5400,51 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ } - /* LY: check conditions to shrink datafile */ - const pgno_t backlog_gap = - pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env); pgno_t shrink = 0; - if ((flags & MDBX_SHRINK_ALLOWED) && pending->mm_geo.shrink && - pending->mm_geo.now - pending->mm_geo.next > - pending->mm_geo.shrink + backlog_gap) { - const pgno_t largest = mdbx_find_largest( + if (flags & MDBX_SHRINK_ALLOWED) { + /* LY: check conditions to discard unused pages */ + const pgno_t largest_pgno = mdbx_find_largest( env, (head->mm_geo.next > pending->mm_geo.next) ? head->mm_geo.next : pending->mm_geo.next); - if (pending->mm_geo.now > largest && - pending->mm_geo.now - largest > pending->mm_geo.shrink + backlog_gap) { - const pgno_t aligner = - pending->mm_geo.grow ? pending->mm_geo.grow : pending->mm_geo.shrink; - const pgno_t with_backlog_gap = largest + backlog_gap; - const pgno_t aligned = pgno_align2os_pgno( - env, with_backlog_gap + aligner - with_backlog_gap % aligner); - const pgno_t bottom = - (aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower; - if (pending->mm_geo.now > bottom) { - flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ - shrink = pending->mm_geo.now - bottom; - pending->mm_geo.now = bottom; - if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a) - mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1); + const size_t largest_aligned2os_bytes = + pgno_align2os_bytes(env, largest_pgno); + const pgno_t largest_aligned2os_pgno = + bytes2pgno(env, largest_aligned2os_bytes); + const pgno_t prev_discarded_pgno = *env->me_discarded_tail; + *env->me_discarded_tail = largest_aligned2os_pgno; + if (prev_discarded_pgno > largest_aligned2os_pgno) { + const size_t prev_discarded_bytes = + pgno_align2os_bytes(env, prev_discarded_pgno); + mdbx_ensure(env, prev_discarded_bytes > largest_aligned2os_bytes); +#if defined(MADV_REMOVE_OR_FREE_OR_DONTNEED) + (void)madvise(env->me_map + largest_aligned2os_bytes, + prev_discarded_bytes - largest_aligned2os_bytes, + MADV_REMOVE_OR_FREE_OR_DONTNEED); +#endif /* MADV_REMOVE_OR_FREE_OR_DONTNEED */ + } + + /* LY: check conditions to shrink datafile */ + const pgno_t backlog_gap = + pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env); + if (pending->mm_geo.shrink && pending->mm_geo.now - pending->mm_geo.next > + pending->mm_geo.shrink + backlog_gap) { + if (pending->mm_geo.now > largest_pgno && + pending->mm_geo.now - largest_pgno > + pending->mm_geo.shrink + backlog_gap) { + const pgno_t aligner = pending->mm_geo.grow ? pending->mm_geo.grow + : pending->mm_geo.shrink; + const pgno_t with_backlog_gap = largest_pgno + backlog_gap; + const pgno_t aligned = pgno_align2os_pgno( + env, with_backlog_gap + aligner - with_backlog_gap % aligner); + const pgno_t bottom = + (aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower; + if (pending->mm_geo.now > bottom) { + flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ + shrink = pending->mm_geo.now - bottom; + pending->mm_geo.now = bottom; + if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a) + mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1); + } } } } @@ -5755,14 +5775,16 @@ static int __cold mdbx_env_map(MDBX_env *env, const int is_exclusive, #endif if (is_exclusive && (env->me_flags & MDBX_WRITEMAP) != 0) { -#ifdef MADV_REMOVE_OR_FREE - const size_t used_alined2os = mdbx_roundup2(usedsize, env->me_os_psize); - if (used_alined2os < env->me_mapsize) - (void)madvise(env->me_map + used_alined2os, - env->me_mapsize - used_alined2os, MADV_REMOVE_OR_FREE); -#else - (void)usedsize; -#endif + const size_t used_aligned2os_bytes = + mdbx_roundup2(usedsize, env->me_os_psize); + *env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes); + if (used_aligned2os_bytes < env->me_mapsize) { +#if defined(MADV_REMOVE_OR_FREE_OR_DONTNEED) + (void)madvise(env->me_map + used_aligned2os_bytes, + env->me_mapsize - used_aligned2os_bytes, + MADV_REMOVE_OR_FREE_OR_DONTNEED); +#endif /* MADV_REMOVE_OR_FREE_OR_DONTNEED */ + } } #ifdef POSIX_FADV_RANDOM @@ -6511,6 +6533,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, env->me_autosync_period = &env->me_lckless_stub.autosync_period; env->me_unsynced_pages = &env->me_lckless_stub.autosync_pending; env->me_autosync_threshold = &env->me_lckless_stub.autosync_threshold; + env->me_discarded_tail = &env->me_lckless_stub.discarded_tail; env->me_maxreaders = UINT_MAX; #ifdef MDBX_OSAL_LOCK env->me_wmutex = &env->me_lckless_stub.wmutex; @@ -6623,6 +6646,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, env->me_autosync_period = &env->me_lck->mti_autosync_period; env->me_unsynced_pages = &env->me_lck->mti_unsynced_pages; env->me_autosync_threshold = &env->me_lck->mti_autosync_threshold; + env->me_discarded_tail = &env->me_lck->mti_discarded_tail; #ifdef MDBX_OSAL_LOCK env->me_wmutex = &env->me_lck->mti_wmutex; #endif diff --git a/src/osal.h b/src/osal.h index be7e44d3..12fdd723 100644 --- a/src/osal.h +++ b/src/osal.h @@ -200,13 +200,15 @@ typedef pthread_mutex_t mdbx_fastmutex_t; #define MADV_DONTDUMP MADV_NOCORE #endif /* MADV_NOCORE -> MADV_DONTDUMP */ -#ifndef MADV_REMOVE_OR_FREE +#ifndef MADV_REMOVE_OR_FREE_OR_DONTNEED #ifdef MADV_REMOVE -#define MADV_REMOVE_OR_FREE MADV_REMOVE +#define MADV_REMOVE_OR_FREE_OR_DONTNEED MADV_REMOVE #elif defined(MADV_FREE) -#define MADV_REMOVE_OR_FREE MADV_FREE +#define MADV_REMOVE_OR_FREE_OR_DONTNEED MADV_FREE +#elif defined(MADV_DONTNEED) +#define MADV_REMOVE_OR_FREE_OR_DONTNEED MADV_DONTNEED #endif -#endif /* MADV_REMOVE_OR_FREE */ +#endif /* MADV_REMOVE_OR_FREE_OR_DONTNEED */ #if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \ defined(i486) || defined(__i486) || defined(__i486__) || \ @@ -765,6 +767,10 @@ typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)( PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags); extern MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory; +typedef DWORD(WINAPI *MDBX_DiscardVirtualMemory)(PVOID VirtualAddress, + SIZE_T Size); +extern MDBX_DiscardVirtualMemory mdbx_DiscardVirtualMemory; + #endif /* Windows */ /*----------------------------------------------------------------------------*/