mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-20 05:58:21 +08:00
mdbx: support exclusive mode without lck-file.
Change-Id: I1beef09d62965d0d777f579a8aa6f15c478eebd2
This commit is contained in:
parent
9e81d5b631
commit
d90e6187f7
3
mdbx.h
3
mdbx.h
@ -100,6 +100,7 @@ typedef DWORD mdbx_tid_t;
|
||||
#define MDBX_EIO ERROR_WRITE_FAULT
|
||||
#define MDBX_EPERM ERROR_INVALID_FUNCTION
|
||||
#define MDBX_EINTR ERROR_CANCELLED
|
||||
#define MDBX_ENOFILE ERROR_FILE_NOT_FOUND
|
||||
|
||||
#else
|
||||
|
||||
@ -120,6 +121,8 @@ typedef pthread_t mdbx_tid_t;
|
||||
#define MDBX_EIO EIO
|
||||
#define MDBX_EPERM EPERM
|
||||
#define MDBX_EINTR EINTR
|
||||
#define MDBX_ENOFILE ENOENT
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
@ -411,7 +411,7 @@ typedef struct MDBX_lockinfo {
|
||||
volatile uint32_t mti_envmode;
|
||||
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
/* Mutex protecting write access to this table. */
|
||||
/* Mutex protecting write-txn. */
|
||||
union {
|
||||
MDBX_OSAL_LOCK mti_wmutex;
|
||||
uint8_t pad_mti_wmutex[MDBX_OSAL_LOCK_SIZE % sizeof(size_t)];
|
||||
@ -742,6 +742,9 @@ struct MDBX_env {
|
||||
void *me_pbuf; /* scratch area for DUPSORT put() */
|
||||
MDBX_txn *me_txn; /* current write transaction */
|
||||
MDBX_txn *me_txn0; /* prealloc'd write transaction */
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
MDBX_OSAL_LOCK *me_wmutex; /* write-txn mutex */
|
||||
#endif
|
||||
MDBX_dbx *me_dbxs; /* array of static DB info */
|
||||
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
|
||||
unsigned *me_dbiseqs; /* array of dbi sequence numbers */
|
||||
@ -786,6 +789,7 @@ struct MDBX_env {
|
||||
/* Workaround for LockFileEx and WriteFile multithread bug */
|
||||
CRITICAL_SECTION me_windowsbug_lock;
|
||||
#else
|
||||
mdbx_fastmutex_t me_lckless_wmutex;
|
||||
mdbx_fastmutex_t me_remap_guard;
|
||||
#endif
|
||||
};
|
||||
|
@ -86,16 +86,19 @@ static __inline int mdbx_lck_shared(int lfd) {
|
||||
}
|
||||
|
||||
int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
|
||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||
return complete ? mdbx_lck_shared(env->me_lfd) : MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int mdbx_lck_upgrade(MDBX_env *env) { return mdbx_lck_exclusive(env->me_lfd); }
|
||||
|
||||
int mdbx_rpid_set(MDBX_env *env) {
|
||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||
return mdbx_lck_op(env->me_lfd, F_SETLK, F_WRLCK, env->me_pid, 1);
|
||||
}
|
||||
|
||||
int mdbx_rpid_clear(MDBX_env *env) {
|
||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||
return mdbx_lck_op(env->me_lfd, F_SETLKW, F_UNLCK, env->me_pid, 1);
|
||||
}
|
||||
|
||||
@ -106,6 +109,7 @@ int mdbx_rpid_clear(MDBX_env *env) {
|
||||
* MDBX_RESULT_FALSE, if pid is dead (lock acquired)
|
||||
* or otherwise the errcode. */
|
||||
int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) {
|
||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||
int rc = mdbx_lck_op(env->me_lfd, F_GETLK, F_WRLCK, pid, 1);
|
||||
if (rc == 0)
|
||||
return MDBX_RESULT_FALSE;
|
||||
@ -166,7 +170,8 @@ void mdbx_lck_destroy(MDBX_env *env) {
|
||||
rc = pthread_mutex_destroy(&env->me_lck->mti_wmutex);
|
||||
assert(rc == 0);
|
||||
(void)rc;
|
||||
/* lock would be released (by kernel) while the me_lfd will be closed */
|
||||
/* file locks would be released (by kernel)
|
||||
* while the me_lfd will be closed */
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -209,15 +214,15 @@ void mdbx_rdt_unlock(MDBX_env *env) {
|
||||
|
||||
int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
|
||||
mdbx_trace(">>");
|
||||
int rc = dontwait ? mdbx_robust_trylock(env, &env->me_lck->mti_wmutex)
|
||||
: mdbx_robust_lock(env, &env->me_lck->mti_wmutex);
|
||||
int rc = dontwait ? mdbx_robust_trylock(env, env->me_wmutex)
|
||||
: mdbx_robust_lock(env, env->me_wmutex);
|
||||
mdbx_trace("<< rc %d", rc);
|
||||
return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
void mdbx_txn_unlock(MDBX_env *env) {
|
||||
mdbx_trace(">>");
|
||||
int rc = mdbx_robust_unlock(env, &env->me_lck->mti_wmutex);
|
||||
int rc = mdbx_robust_unlock(env, env->me_wmutex);
|
||||
mdbx_trace("<< rc %d", rc);
|
||||
if (unlikely(MDBX_IS_ERROR(rc)))
|
||||
mdbx_panic("%s() failed: errcode %d\n", mdbx_func_, rc);
|
||||
@ -253,13 +258,15 @@ int mdbx_lck_seize(MDBX_env *env) {
|
||||
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
||||
|
||||
if (env->me_lfd == INVALID_HANDLE_VALUE) {
|
||||
/* LY: without-lck mode (e.g. on read-only filesystem) */
|
||||
int rc = mdbx_lck_op(env->me_fd, F_SETLK, F_RDLCK, 0, LCK_WHOLE);
|
||||
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
|
||||
int rc = mdbx_lck_op(env->me_fd, F_SETLK,
|
||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
|
||||
LCK_WHOLE);
|
||||
if (rc != 0) {
|
||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc);
|
||||
return rc;
|
||||
}
|
||||
return MDBX_RESULT_FALSE;
|
||||
return MDBX_RESULT_TRUE;
|
||||
}
|
||||
|
||||
if ((env->me_flags & MDBX_RDONLY) == 0) {
|
||||
@ -285,7 +292,7 @@ static int __cold mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex,
|
||||
if (rc == EOWNERDEAD) {
|
||||
/* We own the mutex. Clean up after dead previous owner. */
|
||||
|
||||
int rlocked = (mutex == &env->me_lck->mti_rmutex);
|
||||
int rlocked = (env->me_lck && mutex == &env->me_lck->mti_rmutex);
|
||||
rc = MDBX_SUCCESS;
|
||||
if (!rlocked) {
|
||||
if (unlikely(env->me_txn)) {
|
||||
|
73
src/mdbx.c
73
src/mdbx.c
@ -1931,13 +1931,16 @@ static const char *mdbx_durable_str(const MDBX_meta *const meta) {
|
||||
static txnid_t mdbx_find_oldest(MDBX_txn *txn) {
|
||||
mdbx_tassert(txn, (txn->mt_flags & MDBX_RDONLY) == 0);
|
||||
const MDBX_env *env = txn->mt_env;
|
||||
MDBX_lockinfo *const lck = env->me_lck;
|
||||
|
||||
const txnid_t edge = mdbx_reclaiming_detent(env);
|
||||
mdbx_tassert(txn, edge <= txn->mt_txnid - 1);
|
||||
|
||||
MDBX_lockinfo *const lck = env->me_lck;
|
||||
if (unlikely(env->me_lck == NULL /* exclusive mode */))
|
||||
return edge;
|
||||
|
||||
const txnid_t last_oldest = lck->mti_oldest;
|
||||
mdbx_tassert(txn, edge >= last_oldest);
|
||||
if (last_oldest == edge)
|
||||
if (likely(last_oldest == edge))
|
||||
return edge;
|
||||
|
||||
const uint32_t nothing_changed = MDBX_STRING_TETRAD("None");
|
||||
@ -2734,9 +2737,6 @@ static int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
|
||||
if (unlikely(flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)))
|
||||
return MDBX_EACCESS;
|
||||
|
||||
if (unlikely(!env->me_lck))
|
||||
return MDBX_PANIC;
|
||||
|
||||
const bool outside_txn =
|
||||
(!env->me_txn0 || env->me_txn0->mt_owner != mdbx_thread_self());
|
||||
|
||||
@ -4274,6 +4274,8 @@ int mdbx_txn_commit(MDBX_txn *txn) {
|
||||
}
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
if (likely(env->me_lck))
|
||||
env->me_lck->mti_readers_refresh_flag = false;
|
||||
end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE;
|
||||
|
||||
@ -4926,6 +4928,12 @@ int __cold mdbx_env_create(MDBX_env **penv) {
|
||||
mdbx_fastmutex_destroy(&env->me_dbi_lock);
|
||||
goto bailout;
|
||||
}
|
||||
rc = mdbx_fastmutex_init(&env->me_lckless_wmutex);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
mdbx_fastmutex_destroy(&env->me_remap_guard);
|
||||
mdbx_fastmutex_destroy(&env->me_dbi_lock);
|
||||
goto bailout;
|
||||
}
|
||||
#endif /* Windows */
|
||||
|
||||
VALGRIND_CREATE_MEMPOOL(env, 0, 0);
|
||||
@ -5589,17 +5597,30 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
||||
assert(env->me_lfd == INVALID_HANDLE_VALUE);
|
||||
|
||||
int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd,
|
||||
const int open_flags =
|
||||
(env->me_flags & MDBX_EXCLUSIVE) ? O_RDWR : O_RDWR | O_CREAT;
|
||||
int err = mdbx_openfile(lck_pathname, open_flags, mode, &env->me_lfd,
|
||||
(env->me_flags & MDBX_EXCLUSIVE) ? true : false);
|
||||
if (err != MDBX_SUCCESS) {
|
||||
if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0)
|
||||
if (!(err == MDBX_ENOFILE && (env->me_flags & MDBX_EXCLUSIVE)) &&
|
||||
!(err == MDBX_EROFS && (env->me_flags & MDBX_RDONLY)))
|
||||
return err;
|
||||
/* LY: without-lck mode (e.g. on read-only filesystem) */
|
||||
|
||||
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
|
||||
env->me_lfd = INVALID_HANDLE_VALUE;
|
||||
const int rc = mdbx_lck_seize(env);
|
||||
if (MDBX_IS_ERROR(rc))
|
||||
return rc;
|
||||
|
||||
env->me_oldest = &env->me_oldest_stub;
|
||||
env->me_maxreaders = UINT_MAX;
|
||||
mdbx_debug("lck-setup: %s ", "lockless mode (readonly)");
|
||||
return MDBX_SUCCESS;
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
env->me_wmutex = &env->me_lckless_wmutex;
|
||||
#endif
|
||||
mdbx_debug("lck-setup:%s%s%s", " lck-less",
|
||||
(env->me_flags & MDBX_RDONLY) ? " readonly" : "",
|
||||
(rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative");
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Try to get exclusive lock. If we succeed, then
|
||||
@ -5608,8 +5629,9 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
if (MDBX_IS_ERROR(rc))
|
||||
return rc;
|
||||
|
||||
mdbx_debug("lck-setup: %s ",
|
||||
(rc == MDBX_RESULT_TRUE) ? "exclusive" : "shared");
|
||||
mdbx_debug("lck-setup:%s%s%s", " with-lck",
|
||||
(env->me_flags & MDBX_RDONLY) ? " readonly" : "",
|
||||
(rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative");
|
||||
|
||||
uint64_t size;
|
||||
err = mdbx_filesize(env->me_lfd, &size);
|
||||
@ -5699,6 +5721,9 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
|
||||
mdbx_assert(env, !MDBX_IS_ERROR(rc));
|
||||
env->me_oldest = &env->me_lck->mti_oldest;
|
||||
#ifdef MDBX_OSAL_LOCK
|
||||
env->me_wmutex = &env->me_lck->mti_wmutex;
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -5804,6 +5829,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
|
||||
}
|
||||
|
||||
mdbx_debug("opened dbenv %p", (void *)env);
|
||||
if (env->me_lck) {
|
||||
const unsigned mode_flags =
|
||||
MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC;
|
||||
if (lck_rc == MDBX_RESULT_TRUE) {
|
||||
@ -5836,13 +5862,14 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
|
||||
}
|
||||
}
|
||||
|
||||
if (env->me_lck && (env->me_flags & MDBX_NOTLS) == 0) {
|
||||
if ((env->me_flags & MDBX_NOTLS) == 0) {
|
||||
rc = mdbx_rthc_alloc(&env->me_txkey, &env->me_lck->mti_readers[0],
|
||||
&env->me_lck->mti_readers[env->me_maxreaders]);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
env->me_flags |= MDBX_ENV_TXKEY;
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & MDBX_RDONLY) == 0) {
|
||||
MDBX_txn *txn;
|
||||
@ -5952,7 +5979,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
|
||||
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
|
||||
return MDBX_EBADSIGN;
|
||||
|
||||
if (env->me_lck && (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0) {
|
||||
if ((env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0) {
|
||||
if (env->me_txn0 && env->me_txn0->mt_owner &&
|
||||
env->me_txn0->mt_owner != mdbx_thread_self())
|
||||
return MDBX_BUSY;
|
||||
@ -5987,6 +6014,8 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
|
||||
/* me_remap_guard don't have destructor (Slim Reader/Writer Lock) */
|
||||
DeleteCriticalSection(&env->me_windowsbug_lock);
|
||||
#else
|
||||
mdbx_ensure(env,
|
||||
mdbx_fastmutex_destroy(&env->me_lckless_wmutex) == MDBX_SUCCESS);
|
||||
mdbx_ensure(env,
|
||||
mdbx_fastmutex_destroy(&env->me_remap_guard) == MDBX_SUCCESS);
|
||||
#endif /* Windows */
|
||||
@ -11241,6 +11270,7 @@ int __cold mdbx_reader_list(MDBX_env *env, MDBX_msg_func *func, void *ctx) {
|
||||
return MDBX_EBADSIGN;
|
||||
|
||||
const MDBX_lockinfo *const lck = env->me_lck;
|
||||
if (likely(lck)) {
|
||||
const unsigned snap_nreaders = lck->mti_numreaders;
|
||||
for (unsigned i = 0; i < snap_nreaders; i++) {
|
||||
if (lck->mti_readers[i].mr_pid) {
|
||||
@ -11250,7 +11280,8 @@ int __cold mdbx_reader_list(MDBX_env *env, MDBX_msg_func *func, void *ctx) {
|
||||
(uintptr_t)lck->mti_readers[i].mr_pid,
|
||||
(uintptr_t)lck->mti_readers[i].mr_tid);
|
||||
else
|
||||
snprintf(buf, sizeof(buf), "%10" PRIuPTR " %" PRIxPTR " %" PRIaTXN "\n",
|
||||
snprintf(buf, sizeof(buf),
|
||||
"%10" PRIuPTR " %" PRIxPTR " %" PRIaTXN "\n",
|
||||
(uintptr_t)lck->mti_readers[i].mr_pid,
|
||||
(uintptr_t)lck->mti_readers[i].mr_tid, txnid);
|
||||
|
||||
@ -11265,6 +11296,7 @@ int __cold mdbx_reader_list(MDBX_env *env, MDBX_msg_func *func, void *ctx) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (first)
|
||||
rc = func("(no active readers)\n", ctx);
|
||||
|
||||
@ -11327,6 +11359,13 @@ int __cold mdbx_reader_check0(MDBX_env *env, int rdt_locked, int *dead) {
|
||||
}
|
||||
|
||||
MDBX_lockinfo *const lck = env->me_lck;
|
||||
if (unlikely(lck == NULL)) {
|
||||
/* exclusive mode */
|
||||
if (dead)
|
||||
*dead = 0;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
const unsigned snap_nreaders = lck->mti_numreaders;
|
||||
mdbx_pid_t *pids = alloca((snap_nreaders + 1) * sizeof(mdbx_pid_t));
|
||||
pids[0] = 0;
|
||||
@ -11441,7 +11480,7 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
||||
mdbx_assert(env, oldest < env->me_txn0->mt_txnid);
|
||||
mdbx_assert(env, oldest >= laggard);
|
||||
mdbx_assert(env, oldest >= env->me_oldest[0]);
|
||||
if (oldest == laggard)
|
||||
if (oldest == laggard || unlikely(env->me_lck == NULL /* exclusive mode */))
|
||||
return oldest;
|
||||
|
||||
if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL)))
|
||||
|
Loading…
x
Reference in New Issue
Block a user