mdbx: add MDBX_EXCLUSIVE instead of mdbx_env_open_ex().

Change-Id: I3c817d58d971385bcd07643df14beaf7122c7836
This commit is contained in:
Leo Yuriev 2018-06-13 17:02:31 +03:00
parent 09ad941a05
commit 0dfa9cd09a
8 changed files with 69 additions and 46 deletions

View File

@ -591,8 +591,8 @@ _libmdbx_ при этом не ведет WAL, а передает весь ко
13. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное 13. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное
количество дубликатов для всех типов таблиц и любого положения курсора. количество дубликатов для всех типов таблиц и любого положения курсора.
14. Возможность открыть БД в эксклюзивном режиме посредством 14. Возможность открыть БД в эксклюзивном режиме посредством флага
`mdbx_env_open_ex()`, например в целях её проверки. `MDBX_EXCLUSIVE`, например в целях её проверки.
15. Возможность закрыть БД в "грязном" состоянии (без сброса данных и 15. Возможность закрыть БД в "грязном" состоянии (без сброса данных и
формирования сильной точки фиксации) посредством `mdbx_env_close_ex()`. формирования сильной точки фиксации) посредством `mdbx_env_close_ex()`.

View File

@ -405,7 +405,7 @@ Improvements over LMDB
13. Fixed `mdbx_cursor_count()`, which returns correct count of duplicated for all table types and any cursor position. 13. Fixed `mdbx_cursor_count()`, which returns correct count of duplicated for all table types and any cursor position.
14. Ability to open DB in exclusive mode via `mdbx_env_open_ex()`, e.g. for integrity check. 14. Ability to open DB in exclusive mode with `MDBX_EXCLUSIVE` flag, e.g. for integrity check.
15. Ability to close DB in "dirty" state (without data flush and creation of steady synchronization point) 15. Ability to close DB in "dirty" state (without data flush and creation of steady synchronization point)
via `mdbx_env_close_ex()`. via `mdbx_env_close_ex()`.

7
mdbx.h
View File

@ -288,9 +288,8 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
#define MDBX_MAPASYNC 0x100000u #define MDBX_MAPASYNC 0x100000u
/* tie reader locktable slots to MDBX_txn objects instead of to threads */ /* tie reader locktable slots to MDBX_txn objects instead of to threads */
#define MDBX_NOTLS 0x200000u #define MDBX_NOTLS 0x200000u
/* don't do any locking, caller must manage their own locks /* open DB in exclusive/monopolistic mode. */
* WARNING: libmdbx don't support this mode. */ #define MDBX_EXCLUSIVE 0x400000u
#define MDBX_NOLOCK__UNSUPPORTED 0x400000u
/* don't do readahead */ /* don't do readahead */
#define MDBX_NORDAHEAD 0x800000u #define MDBX_NORDAHEAD 0x800000u
/* don't initialize malloc'd memory before writing to datafile */ /* don't initialize malloc'd memory before writing to datafile */
@ -670,8 +669,6 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* - MDBX_EAGAIN - the environment was locked by another process. */ * - MDBX_EAGAIN - the environment was locked by another process. */
LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mode_t mode); mode_t mode);
LIBMDBX_API int mdbx_env_open_ex(MDBX_env *env, const char *path,
unsigned flags, mode_t mode, int *exclusive);
/* Copy an MDBX environment to the specified path, with options. /* Copy an MDBX environment to the specified path, with options.
* *

View File

@ -132,7 +132,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
EnterCriticalSection(&env->me_windowsbug_lock); EnterCriticalSection(&env->me_windowsbug_lock);
} }
if (flock(env->me_fd, if ((env->me_flags & MDBX_EXCLUSIVE) ||
flock(env->me_fd,
dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
: (LCK_EXCLUSIVE | LCK_WAITFOR), : (LCK_EXCLUSIVE | LCK_WAITFOR),
LCK_BODY)) LCK_BODY))
@ -143,7 +144,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
} }
void mdbx_txn_unlock(MDBX_env *env) { void mdbx_txn_unlock(MDBX_env *env) {
int rc = funlock(env->me_fd, LCK_BODY); int rc = (env->me_flags & MDBX_EXCLUSIVE) ? TRUE
: funlock(env->me_fd, LCK_BODY);
LeaveCriticalSection(&env->me_windowsbug_lock); LeaveCriticalSection(&env->me_windowsbug_lock);
if (!rc) if (!rc)
mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
@ -166,7 +168,8 @@ int mdbx_rdt_lock(MDBX_env *env) {
return MDBX_SUCCESS; /* readonly database in readonly filesystem */ return MDBX_SUCCESS; /* readonly database in readonly filesystem */
/* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */ /* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */
if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) if ((env->me_flags & MDBX_EXCLUSIVE) ||
flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
return MDBX_SUCCESS; return MDBX_SUCCESS;
int rc = GetLastError(); int rc = GetLastError();
@ -177,7 +180,8 @@ int mdbx_rdt_lock(MDBX_env *env) {
void mdbx_rdt_unlock(MDBX_env *env) { void mdbx_rdt_unlock(MDBX_env *env) {
if (env->me_lfd != INVALID_HANDLE_VALUE) { if (env->me_lfd != INVALID_HANDLE_VALUE) {
/* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */ /* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */
if (!funlock(env->me_lfd, LCK_UPPER)) if ((env->me_flags & MDBX_EXCLUSIVE) == 0 &&
!funlock(env->me_lfd, LCK_UPPER))
mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
} }
mdbx_srwlock_ReleaseShared(&env->me_remap_guard); mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
@ -372,6 +376,9 @@ int mdbx_lck_seize(MDBX_env *env) {
int rc; int rc;
assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_fd != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_RESULT_TRUE /* files were must be opened non-shareable */;
if (env->me_lfd == INVALID_HANDLE_VALUE) { if (env->me_lfd == INVALID_HANDLE_VALUE) {
/* LY: without-lck mode (e.g. on read-only filesystem) */ /* LY: without-lck mode (e.g. on read-only filesystem) */
mdbx_jitter4testing(false); mdbx_jitter4testing(false);
@ -414,6 +421,9 @@ int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_SUCCESS /* files were must be opened non-shareable */;
/* 1) must be at E-E (exclusive-write) */ /* 1) must be at E-E (exclusive-write) */
if (!complete) { if (!complete) {
/* transite from E-E to E_? (exclusive-read) */ /* transite from E-E to E_? (exclusive-read) */
@ -448,6 +458,10 @@ int mdbx_lck_upgrade(MDBX_env *env) {
/* Transite from locked state (S-E) to exclusive-write (E-E) */ /* Transite from locked state (S-E) to exclusive-write (E-E) */
assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE);
assert((env->me_flags & MDBX_EXCLUSIVE) == 0);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_RESULT_TRUE /* files were must be opened non-shareable */;
/* 1) must be at S-E (locked), transite to ?_E (middle) */ /* 1) must be at S-E (locked), transite to ?_E (middle) */
if (!funlock(env->me_lfd, LCK_LOWER)) if (!funlock(env->me_lfd, LCK_LOWER))

View File

@ -5583,7 +5583,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd == INVALID_HANDLE_VALUE); assert(env->me_lfd == INVALID_HANDLE_VALUE);
int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd); int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd,
(env->me_flags & MDBX_EXCLUSIVE) ? true : false);
if (err != MDBX_SUCCESS) { if (err != MDBX_SUCCESS) {
if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0) if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0)
return err; return err;
@ -5626,10 +5627,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
return err; return err;
size = wanna; size = wanna;
} }
} else if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) || } else {
size < env->me_os_psize) { if (env->me_flags & MDBX_EXCLUSIVE)
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size); return MDBX_BUSY;
return MDBX_PROBLEM; if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) ||
size < env->me_os_psize) {
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size);
return MDBX_PROBLEM;
}
} }
const size_t maxreaders = const size_t maxreaders =
@ -5699,14 +5704,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
MDBX_COALESCE | MDBX_PAGEPERTURB) MDBX_COALESCE | MDBX_PAGEPERTURB)
#define CHANGELESS \ #define CHANGELESS \
(MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
MDBX_LIFORECLAIM) MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE | CHANGELESS) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE | CHANGELESS)
#error "Persistent DB flags & env flags overlap, but both go in mm_flags" #error "Persistent DB flags & env flags overlap, but both go in mm_flags"
#endif #endif
int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mode_t mode, int *exclusive) { mode_t mode) {
if (unlikely(!env || !path)) if (unlikely(!env || !path))
return MDBX_EINVAL; return MDBX_EINVAL;
@ -5770,7 +5775,8 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
else else
oflags = O_RDWR | O_CREAT; oflags = O_RDWR | O_CREAT;
rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd); rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd,
(env->me_flags & MDBX_EXCLUSIVE) ? true : false);
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
goto bailout; goto bailout;
@ -5791,7 +5797,7 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC; MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC;
if (lck_rc == MDBX_RESULT_TRUE) { if (lck_rc == MDBX_RESULT_TRUE) {
env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY); env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY);
if (exclusive == NULL || *exclusive < 2) { if ((env->me_flags & MDBX_EXCLUSIVE) == 0) {
/* LY: downgrade lock only if exclusive access not requested. /* LY: downgrade lock only if exclusive access not requested.
* in case exclusive==1, just leave value as is. */ * in case exclusive==1, just leave value as is. */
rc = mdbx_lck_downgrade(env, true); rc = mdbx_lck_downgrade(env, true);
@ -5803,10 +5809,6 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
goto bailout; goto bailout;
} else { } else {
if (exclusive) {
/* LY: just indicate that is not an exclusive access. */
*exclusive = 0;
}
if ((env->me_flags & MDBX_RDONLY) == 0) { if ((env->me_flags & MDBX_RDONLY) == 0) {
while (env->me_lck->mti_envmode == MDBX_RDONLY) { while (env->me_lck->mti_envmode == MDBX_RDONLY) {
if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode, if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode,
@ -5877,11 +5879,6 @@ bailout:
return rc; return rc;
} }
int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mode_t mode) {
return mdbx_env_open_ex(env, path, flags, mode, NULL);
}
/* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */
static void __cold mdbx_env_close0(MDBX_env *env) { static void __cold mdbx_env_close0(MDBX_env *env) {
if (!(env->me_flags & MDBX_ENV_ACTIVE)) if (!(env->me_flags & MDBX_ENV_ACTIVE))
@ -10496,8 +10493,8 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags) {
/* The destination path must exist, but the destination file must not. /* The destination path must exist, but the destination file must not.
* We don't want the OS to cache the writes, since the source data is * We don't want the OS to cache the writes, since the source data is
* already in the OS cache. */ * already in the OS cache. */
int rc = int rc = mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666,
mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, &newfd); &newfd, true);
if (rc == MDBX_SUCCESS) { if (rc == MDBX_SUCCESS) {
if (env->me_psize >= env->me_os_psize) { if (env->me_psize >= env->me_os_psize) {
#ifdef F_NOCACHE /* __APPLE__ */ #ifdef F_NOCACHE /* __APPLE__ */

View File

@ -409,19 +409,20 @@ int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
int mdbx_openfile(const char *pathname, int flags, mode_t mode, int mdbx_openfile(const char *pathname, int flags, mode_t mode,
mdbx_filehandle_t *fd) { mdbx_filehandle_t *fd, bool exclusive) {
*fd = INVALID_HANDLE_VALUE; *fd = INVALID_HANDLE_VALUE;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
(void)mode; (void)mode;
DWORD DesiredAccess; DWORD DesiredAccess, ShareMode;
DWORD ShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE;
DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) { switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) {
default: default:
return ERROR_INVALID_PARAMETER; return ERROR_INVALID_PARAMETER;
case O_RDONLY: case O_RDONLY:
DesiredAccess = GENERIC_READ; DesiredAccess = GENERIC_READ;
ShareMode =
exclusive ? FILE_SHARE_READ : (FILE_SHARE_READ | FILE_SHARE_WRITE);
break; break;
case O_WRONLY: /* assume for MDBX_env_copy() and friends output */ case O_WRONLY: /* assume for MDBX_env_copy() and friends output */
DesiredAccess = GENERIC_WRITE; DesiredAccess = GENERIC_WRITE;
@ -430,6 +431,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode,
break; break;
case O_RDWR: case O_RDWR:
DesiredAccess = GENERIC_READ | GENERIC_WRITE; DesiredAccess = GENERIC_READ | GENERIC_WRITE;
ShareMode = exclusive ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE);
break; break;
} }
@ -468,7 +470,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode,
} }
} }
#else #else
(void)exclusive;
#ifdef O_CLOEXEC #ifdef O_CLOEXEC
flags |= O_CLOEXEC; flags |= O_CLOEXEC;
#endif #endif

View File

@ -477,7 +477,7 @@ int mdbx_filesize_sync(mdbx_filehandle_t fd);
int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length); int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length); int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length);
int mdbx_openfile(const char *pathname, int flags, mode_t mode, int mdbx_openfile(const char *pathname, int flags, mode_t mode,
mdbx_filehandle_t *fd); mdbx_filehandle_t *fd, bool exclusive);
int mdbx_closefile(mdbx_filehandle_t fd); int mdbx_closefile(mdbx_filehandle_t fd);
typedef struct mdbx_mmap_param { typedef struct mdbx_mmap_param {

View File

@ -73,8 +73,7 @@ struct {
} walk; } walk;
uint64_t total_unused_bytes; uint64_t total_unused_bytes;
int exclusive = 2; int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE;
int envflags = MDBX_RDONLY;
MDBX_env *env; MDBX_env *env;
MDBX_txn *txn; MDBX_txn *txn;
@ -706,7 +705,7 @@ void verbose_meta(int num, txnid_t txnid, uint64_t sign) {
print(", stay"); print(", stay");
if (txnid > envinfo.mi_recent_txnid && if (txnid > envinfo.mi_recent_txnid &&
(exclusive || (envflags & MDBX_RDONLY) == 0)) (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)
print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")", print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")",
txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid); txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid);
print("\n"); print("\n");
@ -805,7 +804,7 @@ int main(int argc, char *argv[]) {
envflags &= ~MDBX_RDONLY; envflags &= ~MDBX_RDONLY;
break; break;
case 'c': case 'c':
exclusive = 0; envflags &= ~MDBX_EXCLUSIVE;
break; break;
case 'd': case 'd':
dont_traversal = 1; dont_traversal = 1;
@ -853,7 +852,19 @@ int main(int argc, char *argv[]) {
goto bailout; goto bailout;
} }
rc = mdbx_env_open_ex(env, envname, envflags, 0664, &exclusive); rc = mdbx_env_open(env, envname, envflags, 0664);
if ((envflags & MDBX_EXCLUSIVE) &&
(rc == MDBX_BUSY ||
#if defined(_WIN32) || defined(_WIN64)
rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION
#else
rc == EBUSY
#endif
)) {
envflags &= ~MDBX_EXCLUSIVE;
rc = mdbx_env_open(env, envname, envflags, 0664);
}
if (rc) { if (rc) {
error("mdbx_env_open failed, error %d %s\n", rc, mdbx_strerror(rc)); error("mdbx_env_open failed, error %d %s\n", rc, mdbx_strerror(rc));
if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY)) if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY))
@ -861,7 +872,8 @@ int main(int argc, char *argv[]) {
goto bailout; goto bailout;
} }
if (verbose) if (verbose)
print(" - %s mode\n", exclusive ? "monopolistic" : "cooperative"); print(" - %s mode\n",
(envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative");
if ((envflags & MDBX_RDONLY) == 0) { if ((envflags & MDBX_RDONLY) == 0) {
rc = mdbx_txn_lock(env, false); rc = mdbx_txn_lock(env, false);
@ -946,7 +958,7 @@ int main(int argc, char *argv[]) {
++problems_meta; ++problems_meta;
} }
if (exclusive > 1) { if (envflags & MDBX_EXCLUSIVE) {
if (verbose) if (verbose)
print(" - performs full check recent-txn-id with meta-pages\n"); print(" - performs full check recent-txn-id with meta-pages\n");
problems_meta += check_meta_head(true); problems_meta += check_meta_head(true);
@ -1079,7 +1091,8 @@ int main(int argc, char *argv[]) {
} }
if (problems_maindb == 0 && problems_freedb == 0) { if (problems_maindb == 0 && problems_freedb == 0) {
if (!dont_traversal && (exclusive || (envflags & MDBX_RDONLY) == 0)) { if (!dont_traversal &&
(envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) {
if (walk.pgcount != lastpgno - freedb_pages) { if (walk.pgcount != lastpgno - freedb_pages) {
error("used pages mismatch (%" PRIu64 " != %" PRIu64 ")\n", error("used pages mismatch (%" PRIu64 " != %" PRIu64 ")\n",
walk.pgcount, lastpgno - freedb_pages); walk.pgcount, lastpgno - freedb_pages);