mdbx: return MDBX_EBADSIGN when mdbx_env_close() called concurrently from several threads.

Change-Id: I03a8c87bc51eefc5236baa52cee8b12a9f8aa0e2
This commit is contained in:
Leonid Yuriev 2020-04-20 17:00:41 +03:00
parent b64e47078f
commit 5765d92ac7
3 changed files with 73 additions and 61 deletions

19
mdbx.h
View File

@ -1914,14 +1914,17 @@ LIBMDBX_API int mdbx_env_set_syncperiod(MDBX_env *env,
* *
* Returns A non-zero error value on failure and 0 on success. * Returns A non-zero error value on failure and 0 on success.
* Some possible errors are: * Some possible errors are:
* - MDBX_BUSY = The write transaction is running by other thread, in such * - MDBX_BUSY = The write transaction is running by other thread, in such
* case MDBX_env instance has NOT be destroyed not released! * case MDBX_env instance has NOT be destroyed not released!
* NOTE: if any OTHER error code was returned then given * NOTE: if any OTHER error code was returned then given
* MDBX_env instance has been destroyed and released. * MDBX_env instance has been destroyed and released.
* - MDBX_PANIC = If mdbx_env_close_ex() was called in the child process * - MDBX_EBADSIGN = Environment handle already closed (i.e. mdbx_env_close()
* after fork(). In this case MDBX_PANIC is a expecte, * was already called or not valid (i.e. was not created
* i.e. MDBX_env instance was freed in proper manner. * by mdbx_env_create()).
* - MDBX_EIO = an error occurred during synchronization. */ * - MDBX_PANIC = If mdbx_env_close_ex() was called in the child process
* after fork(). In this case MDBX_PANIC is a expecte,
* i.e. MDBX_env instance was freed in proper manner.
* - MDBX_EIO = an error occurred during synchronization. */
LIBMDBX_API int mdbx_env_close_ex(MDBX_env *env, int dont_sync); LIBMDBX_API int mdbx_env_close_ex(MDBX_env *env, int dont_sync);
LIBMDBX_API int mdbx_env_close(MDBX_env *env); LIBMDBX_API int mdbx_env_close(MDBX_env *env);

View File

@ -5528,20 +5528,8 @@ fail:
return rc; return rc;
} }
__cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) { __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
if (unlikely(!env)) int nonblock) {
return MDBX_EINVAL;
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDBX_EBADSIGN;
#if MDBX_TXN_CHECKPID
if (unlikely(env->me_pid != mdbx_getpid())) {
env->me_flags |= MDBX_FATAL_ERROR;
return MDBX_PANIC;
}
#endif /* MDBX_TXN_CHECKPID */
unsigned flags = env->me_flags & ~MDBX_NOMETASYNC; unsigned flags = env->me_flags & ~MDBX_NOMETASYNC;
if (unlikely(flags & (MDBX_RDONLY | MDBX_FATAL_ERROR))) if (unlikely(flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)))
return MDBX_EACCESS; return MDBX_EACCESS;
@ -5636,6 +5624,23 @@ fastpath:
return rc; return rc;
} }
__cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
if (unlikely(!env))
return MDBX_EINVAL;
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDBX_EBADSIGN;
#if MDBX_TXN_CHECKPID
if (unlikely(env->me_pid != mdbx_getpid())) {
env->me_flags |= MDBX_FATAL_ERROR;
return MDBX_PANIC;
}
#endif /* MDBX_TXN_CHECKPID */
return mdbx_env_sync_internal(env, force, nonblock);
}
__cold int mdbx_env_sync(MDBX_env *env) { __cold int mdbx_env_sync(MDBX_env *env) {
return mdbx_env_sync_ex(env, true, false); return mdbx_env_sync_ex(env, true, false);
} }
@ -10171,6 +10176,7 @@ static int __cold mdbx_env_close0(MDBX_env *env) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
env->me_signature = 0;
env->me_flags &= ~MDBX_ENV_ACTIVE; env->me_flags &= ~MDBX_ENV_ACTIVE;
env->me_oldest = nullptr; env->me_oldest = nullptr;
env->me_sync_timestamp = nullptr; env->me_sync_timestamp = nullptr;
@ -10255,36 +10261,33 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
if ((env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0 && env->me_txn0) { if ((env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0 && env->me_txn0) {
if (env->me_txn0->mt_owner && env->me_txn0->mt_owner != mdbx_thread_self()) if (env->me_txn0->mt_owner && env->me_txn0->mt_owner != mdbx_thread_self())
return MDBX_BUSY; return MDBX_BUSY;
if (!dont_sync) { } else
#if defined(_WIN32) || defined(_WIN64) dont_sync = true;
/* On windows, without blocking is impossible to determine whether another
* process is running a writing transaction or not.
* Because in the "owner died" condition kernel don't release
* file lock immediately. */
rc = mdbx_env_sync_ex(env, true, false);
rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
#else
struct stat st;
if (unlikely(fstat(env->me_lazy_fd, &st)))
rc = errno;
else if (st.st_nlink > 0 /* don't sync deleted files */) {
rc = mdbx_env_sync_ex(env, true, true);
rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY ||
rc == EWOULDBLOCK || rc == MDBX_RESULT_TRUE)
? MDBX_SUCCESS
: rc;
}
#endif
}
}
while ((dp = env->me_dpages) != NULL) { if (!atomic_cas32(&env->me_signature, MDBX_ME_SIGNATURE, 0))
ASAN_UNPOISON_MEMORY_REGION(&dp->mp_next, sizeof(dp->mp_next)); return MDBX_EBADSIGN;
VALGRIND_MAKE_MEM_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
env->me_dpages = dp->mp_next; if (!dont_sync) {
mdbx_free(dp); #if defined(_WIN32) || defined(_WIN64)
/* On windows, without blocking is impossible to determine whether another
* process is running a writing transaction or not.
* Because in the "owner died" condition kernel don't release
* file lock immediately. */
rc = mdbx_env_sync_internal(env, true, false);
rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
#else
struct stat st;
if (unlikely(fstat(env->me_lazy_fd, &st)))
rc = errno;
else if (st.st_nlink > 0 /* don't sync deleted files */) {
rc = mdbx_env_sync_internal(env, true, true);
rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY ||
rc == EWOULDBLOCK || rc == MDBX_RESULT_TRUE)
? MDBX_SUCCESS
: rc;
}
#endif
} }
VALGRIND_DESTROY_MEMPOOL(env);
rc = mdbx_env_close0(env) ? MDBX_PANIC : rc; rc = mdbx_env_close0(env) ? MDBX_PANIC : rc;
mdbx_ensure(env, mdbx_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS); mdbx_ensure(env, mdbx_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS);
@ -10300,9 +10303,15 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
mdbx_ensure(env, mdbx_ipclock_destroy(&env->me_lckless_stub.wlock) == 0); mdbx_ensure(env, mdbx_ipclock_destroy(&env->me_lckless_stub.wlock) == 0);
#endif /* MDBX_LOCKING */ #endif /* MDBX_LOCKING */
while ((dp = env->me_dpages) != NULL) {
ASAN_UNPOISON_MEMORY_REGION(&dp->mp_next, sizeof(dp->mp_next));
VALGRIND_MAKE_MEM_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
env->me_dpages = dp->mp_next;
mdbx_free(dp);
}
VALGRIND_DESTROY_MEMPOOL(env);
mdbx_ensure(env, env->me_lcklist_next == nullptr); mdbx_ensure(env, env->me_lcklist_next == nullptr);
env->me_pid = 0; env->me_pid = 0;
env->me_signature = 0;
mdbx_free(env); mdbx_free(env);
return rc; return rc;

View File

@ -862,15 +862,7 @@ typedef struct MDBX_cursor_couple {
/* The database environment. */ /* The database environment. */
struct MDBX_env { struct MDBX_env {
#define MDBX_ME_SIGNATURE UINT32_C(0x9A899641) #define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
size_t me_signature; uint32_t me_signature;
mdbx_mmap_t me_dxb_mmap; /* The main data file */
#define me_map me_dxb_mmap.dxb
#define me_lazy_fd me_dxb_mmap.fd
mdbx_filehandle_t me_dsync_fd;
mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
/* Failed to update the meta page. Probably an I/O error. */ /* Failed to update the meta page. Probably an I/O error. */
#define MDBX_FATAL_ERROR UINT32_C(0x80000000) #define MDBX_FATAL_ERROR UINT32_C(0x80000000)
/* Additional flag for mdbx_sync_locked() */ /* Additional flag for mdbx_sync_locked() */
@ -879,7 +871,15 @@ struct MDBX_env {
#define MDBX_ENV_ACTIVE UINT32_C(0x20000000) #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
/* me_txkey is set */ /* me_txkey is set */
#define MDBX_ENV_TXKEY UINT32_C(0x10000000) #define MDBX_ENV_TXKEY UINT32_C(0x10000000)
uint32_t me_flags; /* see mdbx_env */ uint32_t me_flags;
mdbx_mmap_t me_dxb_mmap; /* The main data file */
#define me_map me_dxb_mmap.dxb
#define me_lazy_fd me_dxb_mmap.fd
mdbx_filehandle_t me_dsync_fd;
mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
unsigned me_psize; /* DB page size, inited from me_os_psize */ unsigned me_psize; /* DB page size, inited from me_os_psize */
unsigned me_psize2log; /* log2 of DB page size */ unsigned me_psize2log; /* log2 of DB page size */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */ unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */