From 0dfa9cd09afd38ce56ce431fef85047120efb3a3 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Wed, 13 Jun 2018 17:02:31 +0300 Subject: [PATCH] mdbx: add MDBX_EXCLUSIVE instead of mdbx_env_open_ex(). Change-Id: I3c817d58d971385bcd07643df14beaf7122c7836 --- README-RU.md | 4 ++-- README.md | 2 +- mdbx.h | 7 ++----- src/lck-windows.c | 22 ++++++++++++++++++---- src/mdbx.c | 39 ++++++++++++++++++--------------------- src/osal.c | 10 ++++++---- src/osal.h | 2 +- src/tools/mdbx_chk.c | 29 +++++++++++++++++++++-------- 8 files changed, 69 insertions(+), 46 deletions(-) diff --git a/README-RU.md b/README-RU.md index 23354555..f4ae5e8f 100644 --- a/README-RU.md +++ b/README-RU.md @@ -591,8 +591,8 @@ _libmdbx_ при этом не ведет WAL, а передает весь ко 13. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное количество дубликатов для всех типов таблиц и любого положения курсора. -14. Возможность открыть БД в эксклюзивном режиме посредством -`mdbx_env_open_ex()`, например в целях её проверки. +14. Возможность открыть БД в эксклюзивном режиме посредством флага +`MDBX_EXCLUSIVE`, например в целях её проверки. 15. Возможность закрыть БД в "грязном" состоянии (без сброса данных и формирования сильной точки фиксации) посредством `mdbx_env_close_ex()`. diff --git a/README.md b/README.md index 48209756..92b6542f 100644 --- a/README.md +++ b/README.md @@ -405,7 +405,7 @@ Improvements over LMDB 13. Fixed `mdbx_cursor_count()`, which returns correct count of duplicated for all table types and any cursor position. -14. Ability to open DB in exclusive mode via `mdbx_env_open_ex()`, e.g. for integrity check. +14. Ability to open DB in exclusive mode with `MDBX_EXCLUSIVE` flag, e.g. for integrity check. 15. Ability to close DB in "dirty" state (without data flush and creation of steady synchronization point) via `mdbx_env_close_ex()`. diff --git a/mdbx.h b/mdbx.h index 3aa5cd87..3769814e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -288,9 +288,8 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b); #define MDBX_MAPASYNC 0x100000u /* tie reader locktable slots to MDBX_txn objects instead of to threads */ #define MDBX_NOTLS 0x200000u -/* don't do any locking, caller must manage their own locks - * WARNING: libmdbx don't support this mode. */ -#define MDBX_NOLOCK__UNSUPPORTED 0x400000u +/* open DB in exclusive/monopolistic mode. */ +#define MDBX_EXCLUSIVE 0x400000u /* don't do readahead */ #define MDBX_NORDAHEAD 0x800000u /* don't initialize malloc'd memory before writing to datafile */ @@ -670,8 +669,6 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv); * - MDBX_EAGAIN - the environment was locked by another process. */ LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, mode_t mode); -LIBMDBX_API int mdbx_env_open_ex(MDBX_env *env, const char *path, - unsigned flags, mode_t mode, int *exclusive); /* Copy an MDBX environment to the specified path, with options. * diff --git a/src/lck-windows.c b/src/lck-windows.c index 9167626a..7d94a1bf 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -132,7 +132,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { EnterCriticalSection(&env->me_windowsbug_lock); } - if (flock(env->me_fd, + if ((env->me_flags & MDBX_EXCLUSIVE) || + flock(env->me_fd, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) : (LCK_EXCLUSIVE | LCK_WAITFOR), LCK_BODY)) @@ -143,7 +144,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } void mdbx_txn_unlock(MDBX_env *env) { - int rc = funlock(env->me_fd, LCK_BODY); + int rc = (env->me_flags & MDBX_EXCLUSIVE) ? TRUE + : funlock(env->me_fd, LCK_BODY); LeaveCriticalSection(&env->me_windowsbug_lock); if (!rc) mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); @@ -166,7 +168,8 @@ int mdbx_rdt_lock(MDBX_env *env) { return MDBX_SUCCESS; /* readonly database in readonly filesystem */ /* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */ - if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) + if ((env->me_flags & MDBX_EXCLUSIVE) || + flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) return MDBX_SUCCESS; int rc = GetLastError(); @@ -177,7 +180,8 @@ int mdbx_rdt_lock(MDBX_env *env) { void mdbx_rdt_unlock(MDBX_env *env) { if (env->me_lfd != INVALID_HANDLE_VALUE) { /* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */ - if (!funlock(env->me_lfd, LCK_UPPER)) + if ((env->me_flags & MDBX_EXCLUSIVE) == 0 && + !funlock(env->me_lfd, LCK_UPPER)) mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); } mdbx_srwlock_ReleaseShared(&env->me_remap_guard); @@ -372,6 +376,9 @@ int mdbx_lck_seize(MDBX_env *env) { int rc; assert(env->me_fd != INVALID_HANDLE_VALUE); + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_RESULT_TRUE /* files were must be opened non-shareable */; + if (env->me_lfd == INVALID_HANDLE_VALUE) { /* LY: without-lck mode (e.g. on read-only filesystem) */ mdbx_jitter4testing(false); @@ -414,6 +421,9 @@ int mdbx_lck_downgrade(MDBX_env *env, bool complete) { assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE); + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_SUCCESS /* files were must be opened non-shareable */; + /* 1) must be at E-E (exclusive-write) */ if (!complete) { /* transite from E-E to E_? (exclusive-read) */ @@ -448,6 +458,10 @@ int mdbx_lck_upgrade(MDBX_env *env) { /* Transite from locked state (S-E) to exclusive-write (E-E) */ assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE); + assert((env->me_flags & MDBX_EXCLUSIVE) == 0); + + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_RESULT_TRUE /* files were must be opened non-shareable */; /* 1) must be at S-E (locked), transite to ?_E (middle) */ if (!funlock(env->me_lfd, LCK_LOWER)) diff --git a/src/mdbx.c b/src/mdbx.c index e2b3c694..9a4bf7a7 100644 --- a/src/mdbx.c +++ b/src/mdbx.c @@ -5583,7 +5583,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd == INVALID_HANDLE_VALUE); - int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd); + int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd, + (env->me_flags & MDBX_EXCLUSIVE) ? true : false); if (err != MDBX_SUCCESS) { if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0) return err; @@ -5626,10 +5627,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, return err; size = wanna; } - } else if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) || - size < env->me_os_psize) { - mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size); - return MDBX_PROBLEM; + } else { + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_BUSY; + if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) || + size < env->me_os_psize) { + mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size); + return MDBX_PROBLEM; + } } const size_t maxreaders = @@ -5699,14 +5704,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, MDBX_COALESCE | MDBX_PAGEPERTURB) #define CHANGELESS \ (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ - MDBX_LIFORECLAIM) + MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE | CHANGELESS) #error "Persistent DB flags & env flags overlap, but both go in mm_flags" #endif -int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, - mode_t mode, int *exclusive) { +int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, + mode_t mode) { if (unlikely(!env || !path)) return MDBX_EINVAL; @@ -5770,7 +5775,8 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, else oflags = O_RDWR | O_CREAT; - rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd); + rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd, + (env->me_flags & MDBX_EXCLUSIVE) ? true : false); if (rc != MDBX_SUCCESS) goto bailout; @@ -5791,7 +5797,7 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC; if (lck_rc == MDBX_RESULT_TRUE) { env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY); - if (exclusive == NULL || *exclusive < 2) { + if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { /* LY: downgrade lock only if exclusive access not requested. * in case exclusive==1, just leave value as is. */ rc = mdbx_lck_downgrade(env, true); @@ -5803,10 +5809,6 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, if (rc != MDBX_SUCCESS) goto bailout; } else { - if (exclusive) { - /* LY: just indicate that is not an exclusive access. */ - *exclusive = 0; - } if ((env->me_flags & MDBX_RDONLY) == 0) { while (env->me_lck->mti_envmode == MDBX_RDONLY) { if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode, @@ -5877,11 +5879,6 @@ bailout: return rc; } -int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, - mode_t mode) { - return mdbx_env_open_ex(env, path, flags, mode, NULL); -} - /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ static void __cold mdbx_env_close0(MDBX_env *env) { if (!(env->me_flags & MDBX_ENV_ACTIVE)) @@ -10496,8 +10493,8 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags) { /* The destination path must exist, but the destination file must not. * We don't want the OS to cache the writes, since the source data is * already in the OS cache. */ - int rc = - mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, &newfd); + int rc = mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, + &newfd, true); if (rc == MDBX_SUCCESS) { if (env->me_psize >= env->me_os_psize) { #ifdef F_NOCACHE /* __APPLE__ */ diff --git a/src/osal.c b/src/osal.c index b98a8771..ad86653b 100644 --- a/src/osal.c +++ b/src/osal.c @@ -409,19 +409,20 @@ int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) { /*----------------------------------------------------------------------------*/ int mdbx_openfile(const char *pathname, int flags, mode_t mode, - mdbx_filehandle_t *fd) { + mdbx_filehandle_t *fd, bool exclusive) { *fd = INVALID_HANDLE_VALUE; #if defined(_WIN32) || defined(_WIN64) (void)mode; - DWORD DesiredAccess; - DWORD ShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; + DWORD DesiredAccess, ShareMode; DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) { default: return ERROR_INVALID_PARAMETER; case O_RDONLY: DesiredAccess = GENERIC_READ; + ShareMode = + exclusive ? FILE_SHARE_READ : (FILE_SHARE_READ | FILE_SHARE_WRITE); break; case O_WRONLY: /* assume for MDBX_env_copy() and friends output */ DesiredAccess = GENERIC_WRITE; @@ -430,6 +431,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode, break; case O_RDWR: DesiredAccess = GENERIC_READ | GENERIC_WRITE; + ShareMode = exclusive ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE); break; } @@ -468,7 +470,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode, } } #else - + (void)exclusive; #ifdef O_CLOEXEC flags |= O_CLOEXEC; #endif diff --git a/src/osal.h b/src/osal.h index e27b4bc6..8728ecad 100644 --- a/src/osal.h +++ b/src/osal.h @@ -477,7 +477,7 @@ int mdbx_filesize_sync(mdbx_filehandle_t fd); int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length); int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length); int mdbx_openfile(const char *pathname, int flags, mode_t mode, - mdbx_filehandle_t *fd); + mdbx_filehandle_t *fd, bool exclusive); int mdbx_closefile(mdbx_filehandle_t fd); typedef struct mdbx_mmap_param { diff --git a/src/tools/mdbx_chk.c b/src/tools/mdbx_chk.c index 0fd23ae6..ae48f69e 100644 --- a/src/tools/mdbx_chk.c +++ b/src/tools/mdbx_chk.c @@ -73,8 +73,7 @@ struct { } walk; uint64_t total_unused_bytes; -int exclusive = 2; -int envflags = MDBX_RDONLY; +int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE; MDBX_env *env; MDBX_txn *txn; @@ -706,7 +705,7 @@ void verbose_meta(int num, txnid_t txnid, uint64_t sign) { print(", stay"); if (txnid > envinfo.mi_recent_txnid && - (exclusive || (envflags & MDBX_RDONLY) == 0)) + (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")", txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid); print("\n"); @@ -805,7 +804,7 @@ int main(int argc, char *argv[]) { envflags &= ~MDBX_RDONLY; break; case 'c': - exclusive = 0; + envflags &= ~MDBX_EXCLUSIVE; break; case 'd': dont_traversal = 1; @@ -853,7 +852,19 @@ int main(int argc, char *argv[]) { goto bailout; } - rc = mdbx_env_open_ex(env, envname, envflags, 0664, &exclusive); + rc = mdbx_env_open(env, envname, envflags, 0664); + if ((envflags & MDBX_EXCLUSIVE) && + (rc == MDBX_BUSY || +#if defined(_WIN32) || defined(_WIN64) + rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION +#else + rc == EBUSY +#endif + )) { + envflags &= ~MDBX_EXCLUSIVE; + rc = mdbx_env_open(env, envname, envflags, 0664); + } + if (rc) { error("mdbx_env_open failed, error %d %s\n", rc, mdbx_strerror(rc)); if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY)) @@ -861,7 +872,8 @@ int main(int argc, char *argv[]) { goto bailout; } if (verbose) - print(" - %s mode\n", exclusive ? "monopolistic" : "cooperative"); + print(" - %s mode\n", + (envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); if ((envflags & MDBX_RDONLY) == 0) { rc = mdbx_txn_lock(env, false); @@ -946,7 +958,7 @@ int main(int argc, char *argv[]) { ++problems_meta; } - if (exclusive > 1) { + if (envflags & MDBX_EXCLUSIVE) { if (verbose) print(" - performs full check recent-txn-id with meta-pages\n"); problems_meta += check_meta_head(true); @@ -1079,7 +1091,8 @@ int main(int argc, char *argv[]) { } if (problems_maindb == 0 && problems_freedb == 0) { - if (!dont_traversal && (exclusive || (envflags & MDBX_RDONLY) == 0)) { + if (!dont_traversal && + (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) { if (walk.pgcount != lastpgno - freedb_pages) { error("used pages mismatch (%" PRIu64 " != %" PRIu64 ")\n", walk.pgcount, lastpgno - freedb_pages);