mdbx: add MDBX_EXCLUSIVE instead of mdbx_env_open_ex().

Change-Id: I3c817d58d971385bcd07643df14beaf7122c7836
This commit is contained in:
Leo Yuriev 2018-06-13 17:02:31 +03:00
parent 09ad941a05
commit 0dfa9cd09a
8 changed files with 69 additions and 46 deletions

View File

@ -591,8 +591,8 @@ _libmdbx_ при этом не ведет WAL, а передает весь ко
13. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное
количество дубликатов для всех типов таблиц и любого положения курсора.
14. Возможность открыть БД в эксклюзивном режиме посредством
`mdbx_env_open_ex()`, например в целях её проверки.
14. Возможность открыть БД в эксклюзивном режиме посредством флага
`MDBX_EXCLUSIVE`, например в целях её проверки.
15. Возможность закрыть БД в "грязном" состоянии (без сброса данных и
формирования сильной точки фиксации) посредством `mdbx_env_close_ex()`.

View File

@ -405,7 +405,7 @@ Improvements over LMDB
13. Fixed `mdbx_cursor_count()`, which returns correct count of duplicated for all table types and any cursor position.
14. Ability to open DB in exclusive mode via `mdbx_env_open_ex()`, e.g. for integrity check.
14. Ability to open DB in exclusive mode with `MDBX_EXCLUSIVE` flag, e.g. for integrity check.
15. Ability to close DB in "dirty" state (without data flush and creation of steady synchronization point)
via `mdbx_env_close_ex()`.

7
mdbx.h
View File

@ -288,9 +288,8 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
#define MDBX_MAPASYNC 0x100000u
/* tie reader locktable slots to MDBX_txn objects instead of to threads */
#define MDBX_NOTLS 0x200000u
/* don't do any locking, caller must manage their own locks
* WARNING: libmdbx don't support this mode. */
#define MDBX_NOLOCK__UNSUPPORTED 0x400000u
/* open DB in exclusive/monopolistic mode. */
#define MDBX_EXCLUSIVE 0x400000u
/* don't do readahead */
#define MDBX_NORDAHEAD 0x800000u
/* don't initialize malloc'd memory before writing to datafile */
@ -670,8 +669,6 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* - MDBX_EAGAIN - the environment was locked by another process. */
LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mode_t mode);
LIBMDBX_API int mdbx_env_open_ex(MDBX_env *env, const char *path,
unsigned flags, mode_t mode, int *exclusive);
/* Copy an MDBX environment to the specified path, with options.
*

View File

@ -132,7 +132,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
EnterCriticalSection(&env->me_windowsbug_lock);
}
if (flock(env->me_fd,
if ((env->me_flags & MDBX_EXCLUSIVE) ||
flock(env->me_fd,
dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
: (LCK_EXCLUSIVE | LCK_WAITFOR),
LCK_BODY))
@ -143,7 +144,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
}
void mdbx_txn_unlock(MDBX_env *env) {
int rc = funlock(env->me_fd, LCK_BODY);
int rc = (env->me_flags & MDBX_EXCLUSIVE) ? TRUE
: funlock(env->me_fd, LCK_BODY);
LeaveCriticalSection(&env->me_windowsbug_lock);
if (!rc)
mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
@ -166,7 +168,8 @@ int mdbx_rdt_lock(MDBX_env *env) {
return MDBX_SUCCESS; /* readonly database in readonly filesystem */
/* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */
if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
if ((env->me_flags & MDBX_EXCLUSIVE) ||
flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
return MDBX_SUCCESS;
int rc = GetLastError();
@ -177,7 +180,8 @@ int mdbx_rdt_lock(MDBX_env *env) {
void mdbx_rdt_unlock(MDBX_env *env) {
if (env->me_lfd != INVALID_HANDLE_VALUE) {
/* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */
if (!funlock(env->me_lfd, LCK_UPPER))
if ((env->me_flags & MDBX_EXCLUSIVE) == 0 &&
!funlock(env->me_lfd, LCK_UPPER))
mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
}
mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
@ -372,6 +376,9 @@ int mdbx_lck_seize(MDBX_env *env) {
int rc;
assert(env->me_fd != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_RESULT_TRUE /* files were must be opened non-shareable */;
if (env->me_lfd == INVALID_HANDLE_VALUE) {
/* LY: without-lck mode (e.g. on read-only filesystem) */
mdbx_jitter4testing(false);
@ -414,6 +421,9 @@ int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_SUCCESS /* files were must be opened non-shareable */;
/* 1) must be at E-E (exclusive-write) */
if (!complete) {
/* transite from E-E to E_? (exclusive-read) */
@ -448,6 +458,10 @@ int mdbx_lck_upgrade(MDBX_env *env) {
/* Transite from locked state (S-E) to exclusive-write (E-E) */
assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE);
assert((env->me_flags & MDBX_EXCLUSIVE) == 0);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_RESULT_TRUE /* files were must be opened non-shareable */;
/* 1) must be at S-E (locked), transite to ?_E (middle) */
if (!funlock(env->me_lfd, LCK_LOWER))

View File

@ -5583,7 +5583,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd == INVALID_HANDLE_VALUE);
int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd);
int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd,
(env->me_flags & MDBX_EXCLUSIVE) ? true : false);
if (err != MDBX_SUCCESS) {
if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0)
return err;
@ -5626,10 +5627,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
return err;
size = wanna;
}
} else if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) ||
size < env->me_os_psize) {
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size);
return MDBX_PROBLEM;
} else {
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_BUSY;
if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) ||
size < env->me_os_psize) {
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size);
return MDBX_PROBLEM;
}
}
const size_t maxreaders =
@ -5699,14 +5704,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
MDBX_COALESCE | MDBX_PAGEPERTURB)
#define CHANGELESS \
(MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
MDBX_LIFORECLAIM)
MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE | CHANGELESS)
#error "Persistent DB flags & env flags overlap, but both go in mm_flags"
#endif
int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
mode_t mode, int *exclusive) {
int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mode_t mode) {
if (unlikely(!env || !path))
return MDBX_EINVAL;
@ -5770,7 +5775,8 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
else
oflags = O_RDWR | O_CREAT;
rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd);
rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd,
(env->me_flags & MDBX_EXCLUSIVE) ? true : false);
if (rc != MDBX_SUCCESS)
goto bailout;
@ -5791,7 +5797,7 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC;
if (lck_rc == MDBX_RESULT_TRUE) {
env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY);
if (exclusive == NULL || *exclusive < 2) {
if ((env->me_flags & MDBX_EXCLUSIVE) == 0) {
/* LY: downgrade lock only if exclusive access not requested.
* in case exclusive==1, just leave value as is. */
rc = mdbx_lck_downgrade(env, true);
@ -5803,10 +5809,6 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags,
if (rc != MDBX_SUCCESS)
goto bailout;
} else {
if (exclusive) {
/* LY: just indicate that is not an exclusive access. */
*exclusive = 0;
}
if ((env->me_flags & MDBX_RDONLY) == 0) {
while (env->me_lck->mti_envmode == MDBX_RDONLY) {
if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode,
@ -5877,11 +5879,6 @@ bailout:
return rc;
}
int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mode_t mode) {
return mdbx_env_open_ex(env, path, flags, mode, NULL);
}
/* Destroy resources from mdbx_env_open(), clear our readers & DBIs */
static void __cold mdbx_env_close0(MDBX_env *env) {
if (!(env->me_flags & MDBX_ENV_ACTIVE))
@ -10496,8 +10493,8 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags) {
/* The destination path must exist, but the destination file must not.
* We don't want the OS to cache the writes, since the source data is
* already in the OS cache. */
int rc =
mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, &newfd);
int rc = mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666,
&newfd, true);
if (rc == MDBX_SUCCESS) {
if (env->me_psize >= env->me_os_psize) {
#ifdef F_NOCACHE /* __APPLE__ */

View File

@ -409,19 +409,20 @@ int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) {
/*----------------------------------------------------------------------------*/
int mdbx_openfile(const char *pathname, int flags, mode_t mode,
mdbx_filehandle_t *fd) {
mdbx_filehandle_t *fd, bool exclusive) {
*fd = INVALID_HANDLE_VALUE;
#if defined(_WIN32) || defined(_WIN64)
(void)mode;
DWORD DesiredAccess;
DWORD ShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE;
DWORD DesiredAccess, ShareMode;
DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) {
default:
return ERROR_INVALID_PARAMETER;
case O_RDONLY:
DesiredAccess = GENERIC_READ;
ShareMode =
exclusive ? FILE_SHARE_READ : (FILE_SHARE_READ | FILE_SHARE_WRITE);
break;
case O_WRONLY: /* assume for MDBX_env_copy() and friends output */
DesiredAccess = GENERIC_WRITE;
@ -430,6 +431,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode,
break;
case O_RDWR:
DesiredAccess = GENERIC_READ | GENERIC_WRITE;
ShareMode = exclusive ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE);
break;
}
@ -468,7 +470,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode,
}
}
#else
(void)exclusive;
#ifdef O_CLOEXEC
flags |= O_CLOEXEC;
#endif

View File

@ -477,7 +477,7 @@ int mdbx_filesize_sync(mdbx_filehandle_t fd);
int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length);
int mdbx_openfile(const char *pathname, int flags, mode_t mode,
mdbx_filehandle_t *fd);
mdbx_filehandle_t *fd, bool exclusive);
int mdbx_closefile(mdbx_filehandle_t fd);
typedef struct mdbx_mmap_param {

View File

@ -73,8 +73,7 @@ struct {
} walk;
uint64_t total_unused_bytes;
int exclusive = 2;
int envflags = MDBX_RDONLY;
int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE;
MDBX_env *env;
MDBX_txn *txn;
@ -706,7 +705,7 @@ void verbose_meta(int num, txnid_t txnid, uint64_t sign) {
print(", stay");
if (txnid > envinfo.mi_recent_txnid &&
(exclusive || (envflags & MDBX_RDONLY) == 0))
(envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)
print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")",
txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid);
print("\n");
@ -805,7 +804,7 @@ int main(int argc, char *argv[]) {
envflags &= ~MDBX_RDONLY;
break;
case 'c':
exclusive = 0;
envflags &= ~MDBX_EXCLUSIVE;
break;
case 'd':
dont_traversal = 1;
@ -853,7 +852,19 @@ int main(int argc, char *argv[]) {
goto bailout;
}
rc = mdbx_env_open_ex(env, envname, envflags, 0664, &exclusive);
rc = mdbx_env_open(env, envname, envflags, 0664);
if ((envflags & MDBX_EXCLUSIVE) &&
(rc == MDBX_BUSY ||
#if defined(_WIN32) || defined(_WIN64)
rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION
#else
rc == EBUSY
#endif
)) {
envflags &= ~MDBX_EXCLUSIVE;
rc = mdbx_env_open(env, envname, envflags, 0664);
}
if (rc) {
error("mdbx_env_open failed, error %d %s\n", rc, mdbx_strerror(rc));
if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY))
@ -861,7 +872,8 @@ int main(int argc, char *argv[]) {
goto bailout;
}
if (verbose)
print(" - %s mode\n", exclusive ? "monopolistic" : "cooperative");
print(" - %s mode\n",
(envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative");
if ((envflags & MDBX_RDONLY) == 0) {
rc = mdbx_txn_lock(env, false);
@ -946,7 +958,7 @@ int main(int argc, char *argv[]) {
++problems_meta;
}
if (exclusive > 1) {
if (envflags & MDBX_EXCLUSIVE) {
if (verbose)
print(" - performs full check recent-txn-id with meta-pages\n");
problems_meta += check_meta_head(true);
@ -1079,7 +1091,8 @@ int main(int argc, char *argv[]) {
}
if (problems_maindb == 0 && problems_freedb == 0) {
if (!dont_traversal && (exclusive || (envflags & MDBX_RDONLY) == 0)) {
if (!dont_traversal &&
(envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) {
if (walk.pgcount != lastpgno - freedb_pages) {
error("used pages mismatch (%" PRIu64 " != %" PRIu64 ")\n",
walk.pgcount, lastpgno - freedb_pages);