mdbx: add timed auto-sync feature.

Change-Id: Ia9b8529fda321d5f78b306f270d157a78f708916
This commit is contained in:
Leonid Yuriev 2019-08-23 13:13:20 +03:00
parent 68e0076ca3
commit 4ceaf842fe
5 changed files with 133 additions and 2 deletions

26
mdbx.h
View File

@ -1601,6 +1601,32 @@ LIBMDBX_API int mdbx_env_close_ex(MDBX_env *env, int dont_sync);
* Returns A non-zero error value on failure and 0 on success. */ * Returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t bytes); LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t bytes);
/* Sets relative period since the last unsteay commit to force flush the data
* buffers to disk, even of MDBX_NOSYNC, MDBX_NOMETASYNC and MDBX_MAPASYNC flags
* in the environment. The value affects all processes which operates with given
* DB until the last process close DB or a new value will be settled.
*
* Data is always written to disk when mdbx_txn_commit() is called,
* but the operating system may keep it buffered. MDBX always flushes
* the OS buffers upon commit as well, unless the environment was
* opened with MDBX_NOSYNC, MDBX_MAPASYNC or in part MDBX_NOMETASYNC.
*
* Settled period don't checked asynchronously, but only inside the functions.
* mdbx_txn_commit() and mdbx_env_sync(). Therefore, in cases where transactions
* are committed infrequently and/or irregularly, polling by mdbx_env_sync() may
* be a reasonable solution to timeout enforcement.
*
* The default is 0, than mean no any timeout checked, and no additional
* flush will be made.
*
* [in] env An environment handle returned by mdbx_env_create()
* [in] seconds_16dot16 The period in 1/65536 of second when a synchronous
* flush would be made since the last unsteay commit.
*
* Returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_env_set_syncperiod(MDBX_env *env,
unsigned seconds_16dot16);
/* Returns a lag of the reading for the given transaction. /* Returns a lag of the reading for the given transaction.
* *
* Returns an information for estimate how much given read-only * Returns an information for estimate how much given read-only

View File

@ -810,6 +810,8 @@ struct MDBX_env {
unsigned me_maxkey_limit; /* max size of a key */ unsigned me_maxkey_limit; /* max size of a key */
mdbx_pid_t me_live_reader; /* have liveness lock in reader table */ mdbx_pid_t me_live_reader; /* have liveness lock in reader table */
void *me_userctx; /* User-settable context */ void *me_userctx; /* User-settable context */
volatile uint64_t *me_unsynced_timeout;
volatile uint64_t *me_autosync_period;
volatile pgno_t *me_unsynced_pages; volatile pgno_t *me_unsynced_pages;
volatile pgno_t *me_autosync_threshold; volatile pgno_t *me_autosync_threshold;
MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */ MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */
@ -818,6 +820,8 @@ struct MDBX_env {
MDBX_OSAL_LOCK wmutex; MDBX_OSAL_LOCK wmutex;
#endif #endif
txnid_t oldest; txnid_t oldest;
uint64_t unsynced_timeout;
uint64_t autosync_period;
pgno_t autosync_pending; pgno_t autosync_pending;
pgno_t autosync_threshold; pgno_t autosync_threshold;
} me_lckless_stub; } me_lckless_stub;

View File

@ -3025,7 +3025,9 @@ __cold static int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
pgno_t unsynced_pages = *env->me_unsynced_pages; pgno_t unsynced_pages = *env->me_unsynced_pages;
if (!META_IS_STEADY(head) || unsynced_pages) { if (!META_IS_STEADY(head) || unsynced_pages) {
const pgno_t autosync_threshold = *env->me_autosync_threshold; const pgno_t autosync_threshold = *env->me_autosync_threshold;
if (force || (autosync_threshold && unsynced_pages >= autosync_threshold)) const uint64_t unsynced_timeout = *env->me_unsynced_timeout;
if (force || (autosync_threshold && unsynced_pages >= autosync_threshold) ||
(unsynced_timeout && mdbx_osal_monotime() >= unsynced_timeout))
flags &= MDBX_WRITEMAP /* clear flags for full steady sync */; flags &= MDBX_WRITEMAP /* clear flags for full steady sync */;
if (outside_txn) { if (outside_txn) {
@ -5390,7 +5392,9 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
if (flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) { if (flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) {
/* Check auto-sync conditions */ /* Check auto-sync conditions */
const pgno_t autosync_threshold = *env->me_autosync_threshold; const pgno_t autosync_threshold = *env->me_autosync_threshold;
if (autosync_threshold && *env->me_unsynced_pages >= autosync_threshold) const uint64_t unsynced_timeout = *env->me_unsynced_timeout;
if ((autosync_threshold && *env->me_unsynced_pages >= autosync_threshold) ||
(unsynced_timeout && mdbx_osal_monotime() >= unsynced_timeout))
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
} }
@ -5457,8 +5461,12 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
if (rc == MDBX_RESULT_FALSE /* carry steady */) { if (rc == MDBX_RESULT_FALSE /* carry steady */) {
pending->mm_datasync_sign = mdbx_meta_sign(pending); pending->mm_datasync_sign = mdbx_meta_sign(pending);
*env->me_unsynced_pages = 0; *env->me_unsynced_pages = 0;
*env->me_unsynced_timeout = 0;
} else { } else {
assert(rc == MDBX_RESULT_TRUE /* carry non-steady */); assert(rc == MDBX_RESULT_TRUE /* carry non-steady */);
const uint64_t autosync_period = *env->me_autosync_period;
if (autosync_period && *env->me_unsynced_timeout == 0)
*env->me_unsynced_timeout = mdbx_osal_monotime() + autosync_period;
pending->mm_datasync_sign = pending->mm_datasync_sign =
(flags & MDBX_UTTERLY_NOSYNC) == MDBX_UTTERLY_NOSYNC (flags & MDBX_UTTERLY_NOSYNC) == MDBX_UTTERLY_NOSYNC
? MDBX_DATASIGN_NONE ? MDBX_DATASIGN_NONE
@ -6430,6 +6438,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
return rc; return rc;
env->me_oldest = &env->me_lckless_stub.oldest; env->me_oldest = &env->me_lckless_stub.oldest;
env->me_unsynced_timeout = &env->me_lckless_stub.unsynced_timeout;
env->me_autosync_period = &env->me_lckless_stub.autosync_period;
env->me_unsynced_pages = &env->me_lckless_stub.autosync_pending; env->me_unsynced_pages = &env->me_lckless_stub.autosync_pending;
env->me_autosync_threshold = &env->me_lckless_stub.autosync_threshold; env->me_autosync_threshold = &env->me_lckless_stub.autosync_threshold;
env->me_maxreaders = UINT_MAX; env->me_maxreaders = UINT_MAX;
@ -6540,6 +6550,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
mdbx_assert(env, !MDBX_IS_ERROR(rc)); mdbx_assert(env, !MDBX_IS_ERROR(rc));
env->me_oldest = &env->me_lck->mti_oldest_reader; env->me_oldest = &env->me_lck->mti_oldest_reader;
env->me_unsynced_timeout = &env->me_lck->mti_unsynced_timeout;
env->me_autosync_period = &env->me_lck->mti_autosync_period;
env->me_unsynced_pages = &env->me_lck->mti_unsynced_pages; env->me_unsynced_pages = &env->me_lck->mti_unsynced_pages;
env->me_autosync_threshold = &env->me_lck->mti_autosync_threshold; env->me_autosync_threshold = &env->me_lck->mti_autosync_threshold;
#ifdef MDBX_OSAL_LOCK #ifdef MDBX_OSAL_LOCK
@ -6788,6 +6800,8 @@ static void __cold mdbx_env_close0(MDBX_env *env) {
if (env->me_lck) if (env->me_lck)
mdbx_munmap(&env->me_lck_mmap); mdbx_munmap(&env->me_lck_mmap);
env->me_oldest = nullptr; env->me_oldest = nullptr;
env->me_unsynced_timeout = nullptr;
env->me_autosync_period = nullptr;
env->me_unsynced_pages = nullptr; env->me_unsynced_pages = nullptr;
env->me_autosync_threshold = nullptr; env->me_autosync_threshold = nullptr;
@ -13028,6 +13042,23 @@ int __cold mdbx_env_set_syncbytes(MDBX_env *env, size_t bytes) {
return bytes ? mdbx_env_sync(env, false) : MDBX_SUCCESS; return bytes ? mdbx_env_sync(env, false) : MDBX_SUCCESS;
} }
int __cold mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
if (unlikely(!env))
return MDBX_EINVAL;
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDBX_EBADSIGN;
if (unlikely(env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)))
return MDBX_EACCESS;
if (unlikely(!env->me_map))
return MDBX_EPERM;
*env->me_autosync_period = mdbx_osal_16dot16_to_monotime(seconds_16dot16);
return seconds_16dot16 ? mdbx_env_sync(env, false) : MDBX_SUCCESS;
}
int __cold mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oomfunc) { int __cold mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oomfunc) {
if (unlikely(!env)) if (unlikely(!env))
return MDBX_EINVAL; return MDBX_EINVAL;

View File

@ -1223,3 +1223,71 @@ __cold void mdbx_osal_jitter(bool tiny) {
#endif #endif
} }
} }
#if defined(_WIN32) || defined(_WIN64)
#elif defined(__APPLE__) || defined(__MACH__)
#include <mach/mach_time.h>
#elif defined(__linux__) || defined(__gnu_linux__)
static __cold clockid_t choise_monoclock() {
struct timespec probe;
#if defined(CLOCK_BOOTTIME)
if (clock_gettime(CLOCK_BOOTTIME, &probe) == 0)
return CLOCK_BOOTTIME;
#elif defined(CLOCK_MONOTONIC_RAW)
if (clock_gettime(CLOCK_MONOTONIC_RAW, &probe) == 0)
return CLOCK_MONOTONIC_RAW;
#elif defined(CLOCK_MONOTONIC_COARSE)
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &probe) == 0)
return CLOCK_MONOTONIC_COARSE;
#endif
return CLOCK_MONOTONIC;
}
#endif
uint64_t mdbx_osal_16dot16_to_monotime(uint32_t seconds_16dot16) {
#if defined(_WIN32) || defined(_WIN64)
static LARGE_INTEGER performance_frequency;
if (performance_frequency.QuadPart == 0)
QueryPerformanceFrequency(&performance_frequency);
const uint64_t ratio = performance_frequency.QuadPart;
#elif defined(__APPLE__) || defined(__MACH__)
static uint64_t ratio;
if (!ratio) {
mach_timebase_info_data_t ti;
mach_timebase_info(&ti);
ratio = UINT64_C(1000000000) * ti.denom / ti.numer;
}
#else
const uint64_t ratio = UINT64_C(1000000000);
#endif
return (ratio * seconds_16dot16 + 32768) >> 16;
}
uint64_t mdbx_osal_monotime(void) {
#if defined(_WIN32) || defined(_WIN64)
LARGE_INTEGER counter;
counter.QuadPart = 0;
QueryPerformanceCounter(&counter);
return counter.QuadPart;
#elif defined(__APPLE__) || defined(__MACH__)
return mach_absolute_time();
#else
#if defined(__linux__) || defined(__gnu_linux__)
static clockid_t posix_clockid = -1;
if (unlikely(posix_clockid < 0))
posix_clockid = choise_monoclock();
#elif defined(CLOCK_MONOTONIC)
#define posix_clockid CLOCK_MONOTONIC
#else
#define posix_clockid CLOCK_REALTIME
#endif
struct timespec ts;
if (unlikely(clock_gettime(posix_clockid, &ts) != 0)) {
ts.tv_nsec = 0;
ts.tv_sec = 0;
}
return ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
#endif
}

View File

@ -599,6 +599,8 @@ static __inline mdbx_tid_t mdbx_thread_self(void) {
} }
void mdbx_osal_jitter(bool tiny); void mdbx_osal_jitter(bool tiny);
uint64_t mdbx_osal_monotime(void);
uint64_t mdbx_osal_16dot16_to_monotime(uint32_t seconds_16dot16);
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* lck stuff */ /* lck stuff */