diff --git a/mdbx.h b/mdbx.h index 539b85b1..c4712c73 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1601,6 +1601,32 @@ LIBMDBX_API int mdbx_env_close_ex(MDBX_env *env, int dont_sync); * Returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t bytes); +/* Sets relative period since the last unsteay commit to force flush the data + * buffers to disk, even of MDBX_NOSYNC, MDBX_NOMETASYNC and MDBX_MAPASYNC flags + * in the environment. The value affects all processes which operates with given + * DB until the last process close DB or a new value will be settled. + * + * Data is always written to disk when mdbx_txn_commit() is called, + * but the operating system may keep it buffered. MDBX always flushes + * the OS buffers upon commit as well, unless the environment was + * opened with MDBX_NOSYNC, MDBX_MAPASYNC or in part MDBX_NOMETASYNC. + * + * Settled period don't checked asynchronously, but only inside the functions. + * mdbx_txn_commit() and mdbx_env_sync(). Therefore, in cases where transactions + * are committed infrequently and/or irregularly, polling by mdbx_env_sync() may + * be a reasonable solution to timeout enforcement. + * + * The default is 0, than mean no any timeout checked, and no additional + * flush will be made. + * + * [in] env An environment handle returned by mdbx_env_create() + * [in] seconds_16dot16 The period in 1/65536 of second when a synchronous + * flush would be made since the last unsteay commit. + * + * Returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_env_set_syncperiod(MDBX_env *env, + unsigned seconds_16dot16); + /* Returns a lag of the reading for the given transaction. * * Returns an information for estimate how much given read-only diff --git a/src/bits.h b/src/bits.h index 0fad3e5a..e0be895b 100644 --- a/src/bits.h +++ b/src/bits.h @@ -810,6 +810,8 @@ struct MDBX_env { unsigned me_maxkey_limit; /* max size of a key */ mdbx_pid_t me_live_reader; /* have liveness lock in reader table */ void *me_userctx; /* User-settable context */ + volatile uint64_t *me_unsynced_timeout; + volatile uint64_t *me_autosync_period; volatile pgno_t *me_unsynced_pages; volatile pgno_t *me_autosync_threshold; MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */ @@ -818,6 +820,8 @@ struct MDBX_env { MDBX_OSAL_LOCK wmutex; #endif txnid_t oldest; + uint64_t unsynced_timeout; + uint64_t autosync_period; pgno_t autosync_pending; pgno_t autosync_threshold; } me_lckless_stub; diff --git a/src/mdbx.c b/src/mdbx.c index 7501cb12..92d0cfe7 100644 --- a/src/mdbx.c +++ b/src/mdbx.c @@ -3025,7 +3025,9 @@ __cold static int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) { pgno_t unsynced_pages = *env->me_unsynced_pages; if (!META_IS_STEADY(head) || unsynced_pages) { const pgno_t autosync_threshold = *env->me_autosync_threshold; - if (force || (autosync_threshold && unsynced_pages >= autosync_threshold)) + const uint64_t unsynced_timeout = *env->me_unsynced_timeout; + if (force || (autosync_threshold && unsynced_pages >= autosync_threshold) || + (unsynced_timeout && mdbx_osal_monotime() >= unsynced_timeout)) flags &= MDBX_WRITEMAP /* clear flags for full steady sync */; if (outside_txn) { @@ -5390,7 +5392,9 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, if (flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) { /* Check auto-sync conditions */ const pgno_t autosync_threshold = *env->me_autosync_threshold; - if (autosync_threshold && *env->me_unsynced_pages >= autosync_threshold) + const uint64_t unsynced_timeout = *env->me_unsynced_timeout; + if ((autosync_threshold && *env->me_unsynced_pages >= autosync_threshold) || + (unsynced_timeout && mdbx_osal_monotime() >= unsynced_timeout)) flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ } @@ -5457,8 +5461,12 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, if (rc == MDBX_RESULT_FALSE /* carry steady */) { pending->mm_datasync_sign = mdbx_meta_sign(pending); *env->me_unsynced_pages = 0; + *env->me_unsynced_timeout = 0; } else { assert(rc == MDBX_RESULT_TRUE /* carry non-steady */); + const uint64_t autosync_period = *env->me_autosync_period; + if (autosync_period && *env->me_unsynced_timeout == 0) + *env->me_unsynced_timeout = mdbx_osal_monotime() + autosync_period; pending->mm_datasync_sign = (flags & MDBX_UTTERLY_NOSYNC) == MDBX_UTTERLY_NOSYNC ? MDBX_DATASIGN_NONE @@ -6430,6 +6438,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, return rc; env->me_oldest = &env->me_lckless_stub.oldest; + env->me_unsynced_timeout = &env->me_lckless_stub.unsynced_timeout; + env->me_autosync_period = &env->me_lckless_stub.autosync_period; env->me_unsynced_pages = &env->me_lckless_stub.autosync_pending; env->me_autosync_threshold = &env->me_lckless_stub.autosync_threshold; env->me_maxreaders = UINT_MAX; @@ -6540,6 +6550,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, mdbx_assert(env, !MDBX_IS_ERROR(rc)); env->me_oldest = &env->me_lck->mti_oldest_reader; + env->me_unsynced_timeout = &env->me_lck->mti_unsynced_timeout; + env->me_autosync_period = &env->me_lck->mti_autosync_period; env->me_unsynced_pages = &env->me_lck->mti_unsynced_pages; env->me_autosync_threshold = &env->me_lck->mti_autosync_threshold; #ifdef MDBX_OSAL_LOCK @@ -6788,6 +6800,8 @@ static void __cold mdbx_env_close0(MDBX_env *env) { if (env->me_lck) mdbx_munmap(&env->me_lck_mmap); env->me_oldest = nullptr; + env->me_unsynced_timeout = nullptr; + env->me_autosync_period = nullptr; env->me_unsynced_pages = nullptr; env->me_autosync_threshold = nullptr; @@ -13028,6 +13042,23 @@ int __cold mdbx_env_set_syncbytes(MDBX_env *env, size_t bytes) { return bytes ? mdbx_env_sync(env, false) : MDBX_SUCCESS; } +int __cold mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) { + if (unlikely(!env)) + return MDBX_EINVAL; + + if (unlikely(env->me_signature != MDBX_ME_SIGNATURE)) + return MDBX_EBADSIGN; + + if (unlikely(env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR))) + return MDBX_EACCESS; + + if (unlikely(!env->me_map)) + return MDBX_EPERM; + + *env->me_autosync_period = mdbx_osal_16dot16_to_monotime(seconds_16dot16); + return seconds_16dot16 ? mdbx_env_sync(env, false) : MDBX_SUCCESS; +} + int __cold mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oomfunc) { if (unlikely(!env)) return MDBX_EINVAL; diff --git a/src/osal.c b/src/osal.c index 301bb8a1..e0ae3f47 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1223,3 +1223,71 @@ __cold void mdbx_osal_jitter(bool tiny) { #endif } } + +#if defined(_WIN32) || defined(_WIN64) +#elif defined(__APPLE__) || defined(__MACH__) +#include +#elif defined(__linux__) || defined(__gnu_linux__) +static __cold clockid_t choise_monoclock() { + struct timespec probe; +#if defined(CLOCK_BOOTTIME) + if (clock_gettime(CLOCK_BOOTTIME, &probe) == 0) + return CLOCK_BOOTTIME; +#elif defined(CLOCK_MONOTONIC_RAW) + if (clock_gettime(CLOCK_MONOTONIC_RAW, &probe) == 0) + return CLOCK_MONOTONIC_RAW; +#elif defined(CLOCK_MONOTONIC_COARSE) + if (clock_gettime(CLOCK_MONOTONIC_COARSE, &probe) == 0) + return CLOCK_MONOTONIC_COARSE; +#endif + return CLOCK_MONOTONIC; +} +#endif + +uint64_t mdbx_osal_16dot16_to_monotime(uint32_t seconds_16dot16) { +#if defined(_WIN32) || defined(_WIN64) + static LARGE_INTEGER performance_frequency; + if (performance_frequency.QuadPart == 0) + QueryPerformanceFrequency(&performance_frequency); + const uint64_t ratio = performance_frequency.QuadPart; +#elif defined(__APPLE__) || defined(__MACH__) + static uint64_t ratio; + if (!ratio) { + mach_timebase_info_data_t ti; + mach_timebase_info(&ti); + ratio = UINT64_C(1000000000) * ti.denom / ti.numer; + } +#else + const uint64_t ratio = UINT64_C(1000000000); +#endif + return (ratio * seconds_16dot16 + 32768) >> 16; +} + +uint64_t mdbx_osal_monotime(void) { +#if defined(_WIN32) || defined(_WIN64) + LARGE_INTEGER counter; + counter.QuadPart = 0; + QueryPerformanceCounter(&counter); + return counter.QuadPart; +#elif defined(__APPLE__) || defined(__MACH__) + return mach_absolute_time(); +#else + +#if defined(__linux__) || defined(__gnu_linux__) + static clockid_t posix_clockid = -1; + if (unlikely(posix_clockid < 0)) + posix_clockid = choise_monoclock(); +#elif defined(CLOCK_MONOTONIC) +#define posix_clockid CLOCK_MONOTONIC +#else +#define posix_clockid CLOCK_REALTIME +#endif + + struct timespec ts; + if (unlikely(clock_gettime(posix_clockid, &ts) != 0)) { + ts.tv_nsec = 0; + ts.tv_sec = 0; + } + return ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; +#endif +} diff --git a/src/osal.h b/src/osal.h index 5525c10a..90189762 100644 --- a/src/osal.h +++ b/src/osal.h @@ -599,6 +599,8 @@ static __inline mdbx_tid_t mdbx_thread_self(void) { } void mdbx_osal_jitter(bool tiny); +uint64_t mdbx_osal_monotime(void); +uint64_t mdbx_osal_16dot16_to_monotime(uint32_t seconds_16dot16); /*----------------------------------------------------------------------------*/ /* lck stuff */