mdbx-posix: fix mdbx_lck_seize() and mdbx_lck_destroy() for rare race-case.

Change-Id: I49c1d39665e3ef1eb1422b9da02c5719be3ee3b0
This commit is contained in:
Leonid Yuriev 2019-11-08 23:48:53 +03:00
parent 6a7499c8fc
commit 4dfa97cda9

View File

@ -158,8 +158,8 @@ static int lck_op(mdbx_filehandle_t fd, int cmd, int lck, off_t offset,
if (rc != -1) { if (rc != -1) {
if (cmd == op_getlk) { if (cmd == op_getlk) {
/* Checks reader by pid. Returns: /* Checks reader by pid. Returns:
* MDBX_RESULT_TRUE - if pid is live (unable to acquire lock) * MDBX_RESULT_TRUE - if pid is live (reader holds a lock).
* MDBX_RESULT_FALSE - if pid is dead (lock acquired). */ * MDBX_RESULT_FALSE - if pid is dead (a lock could be placed). */
return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE
: MDBX_RESULT_TRUE; : MDBX_RESULT_TRUE;
} }
@ -218,6 +218,7 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */; return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
} }
retry_exclusive:
/* Firstly try to get exclusive locking. */ /* Firstly try to get exclusive locking. */
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1); rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1);
if (rc == MDBX_SUCCESS) { if (rc == MDBX_SUCCESS) {
@ -237,16 +238,15 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
} }
/* Fallback to lck-shared */ /* Fallback to lck-shared */
rc = lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1);
if (rc != MDBX_SUCCESS) {
mdbx_error("%s(%s) failed: errcode %u", __func__, "fallback-shared", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
/* Done: return with shared locking. */
return MDBX_RESULT_FALSE;
} }
/* Here could be one of two::
* - mdbx_lck_destroy() from the another process was hold the lock
* during a destruction.
* - either mdbx_lck_seize() from the another process was got the exclusive
* lock and doing initialization.
* For distinguish these cases will use size of the lck-file later. */
/* Wait for lck-shared now. */ /* Wait for lck-shared now. */
/* Here may be await during transient processes, for instance until another /* Here may be await during transient processes, for instance until another
* competing process doesn't call lck_downgrade(). */ * competing process doesn't call lck_downgrade(). */
@ -257,6 +257,39 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
return rc; return rc;
} }
/* got shared, retry exclusive */
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1);
if (rc == MDBX_SUCCESS)
goto continue_dxb_exclusive;
if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
rc == EDEADLK)) {
mdbx_error("%s(%s) failed: errcode %u", __func__, "try-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
/* Checking file size for detect the situation when we got the shared lock
* immediately after mdbx_lck_destroy(). */
struct stat st;
if (fstat(env->me_lfd, &st)) {
rc = errno;
mdbx_error("%s(%s) failed: errcode %u", __func__, "check-filesize", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
if (st.st_size < (unsigned)(sizeof(MDBX_lockinfo) + sizeof(MDBX_reader))) {
mdbx_verbose("lck-file is too short (%u), retry exclusive-lock",
(unsigned)st.st_size);
rc = lck_op(env->me_lfd, op_setlk, F_UNLCK, 0, 1);
if (rc != MDBX_SUCCESS) {
mdbx_error("%s(%s) failed: errcode %u", __func__, "retry-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
goto retry_exclusive;
}
/* Lock against another process operating in without-lck or exclusive mode. */ /* Lock against another process operating in without-lck or exclusive mode. */
rc = rc =
lck_op(env->me_fd, op_setlk, lck_op(env->me_fd, op_setlk,
@ -268,20 +301,8 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
return rc; return rc;
} }
/* got shared, retry exclusive */ /* Done: return with shared locking. */
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1); return MDBX_RESULT_FALSE;
if (rc == MDBX_SUCCESS)
goto continue_dxb_exclusive;
if (rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
rc == EDEADLK)
return MDBX_RESULT_FALSE /* Done: exclusive is unavailable,
but shared locks are alive. */
;
mdbx_error("%s(%s) failed: errcode %u", __func__, "try-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
} }
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) { MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
@ -316,7 +337,8 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
/* try get exclusive access */ /* try get exclusive access */
lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 &&
lck_op(env->me_fd, op_setlk, lck_op(env->me_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX)) { (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
OFF_T_MAX) == 0) {
mdbx_verbose("%s: got exclusive, drown mutexes", __func__); mdbx_verbose("%s: got exclusive, drown mutexes", __func__);
#if MDBX_USE_MUTEXES > 0 #if MDBX_USE_MUTEXES > 0
rc = pthread_mutex_destroy(&env->me_lck->mti_rlock); rc = pthread_mutex_destroy(&env->me_lck->mti_rlock);
@ -328,10 +350,12 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
rc = sem_destroy(&env->me_lck->mti_wlock) ? errno : 0; rc = sem_destroy(&env->me_lck->mti_wlock) ? errno : 0;
#endif /* MDBX_USE_MUTEXES */ #endif /* MDBX_USE_MUTEXES */
mdbx_assert(env, rc == 0); mdbx_assert(env, rc == 0);
if (rc == 0) { if (rc == 0) {
memset(env->me_lck, 0x81, sizeof(MDBX_lockinfo)); mdbx_munmap(&env->me_lck_mmap);
msync(env->me_lck, env->me_os_psize, MS_ASYNC); rc = ftruncate(env->me_lfd, 0) ? errno : 0;
} }
mdbx_jitter4testing(false); mdbx_jitter4testing(false);
} }