diff --git a/mdbx.h b/mdbx.h index 768e9b00..de16ccff 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5570,7 +5570,7 @@ LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); /** \brief Releases write-transaction lock. * Provided for custom and/or complex locking scenarios. * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); +LIBMDBX_API int mdbx_txn_unlock(MDBX_env *env); /** \brief Open an environment instance using specific meta-page * for checking and recovery. diff --git a/src/core.c b/src/core.c index 899b65af..d0cb0914 100644 --- a/src/core.c +++ b/src/core.c @@ -8200,7 +8200,7 @@ retry:; rc = MDBX_SUCCESS /* means "some data was synced" */; } - err = mdbx_txn_lock(env, nonblock); + err = osal_txn_lock(env, nonblock); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -8247,7 +8247,7 @@ retry:; bailout: if (locked) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -8442,7 +8442,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { } else if (env->me_flags & MDBX_RDONLY) { /* read-only mode, no write-txn, no wlock mutex */ last = NUM_METAS; - } else if (mdbx_txn_lock(env, true) == MDBX_SUCCESS) { + } else if (osal_txn_lock(env, true) == MDBX_SUCCESS) { /* no write-txn */ last = NUM_METAS; should_unlock = true; @@ -8463,7 +8463,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { pgno2bytes(env, edge - last)); } if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); } } #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ @@ -8840,6 +8840,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; return MDBX_SUCCESS; } + txn->mt_owner = tid; /* Seek & fetch the last meta */ uint64_t timestamp = 0; @@ -8915,12 +8916,11 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - eASSERT(env, txn->mt_txnid >= env->me_lck->mti_oldest_reader.weak); txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); - txn->mt_numdbs = env->me_numdbs; } else { eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | MDBX_WRITEMAP)) == 0); @@ -8946,16 +8946,16 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { /* Not yet touching txn == env->me_txn0, it may be active */ jitter4testing(false); - rc = mdbx_txn_lock(env, !!(flags & MDBX_TXN_TRY)); + rc = osal_txn_lock(env, !!(flags & MDBX_TXN_TRY)); if (unlikely(rc)) return rc; if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_PANIC; } #if defined(_WIN32) || defined(_WIN64) if (unlikely(!env->me_map)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_EPERM; } #endif /* Windows */ @@ -9129,7 +9129,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ - txn->mt_owner = tid; return MDBX_SUCCESS; } bailout: @@ -9810,9 +9809,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - atomic_add32(&env->me_ignore_EDEADLK, 1); txn_valgrind(env, nullptr); - atomic_sub32(&env->me_ignore_EDEADLK, 1); #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); @@ -9845,7 +9842,6 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; - txn->mt_owner = 0; env->me_txn = txn->mt_parent; pnl_free(txn->tw.spilled.list); txn->tw.spilled.list = nullptr; @@ -9858,7 +9854,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (!(env->me_flags & MDBX_WRITEMAP)) dlist_free(txn); /* The writer mutex was locked in mdbx_txn_begin. */ - mdbx_txn_unlock(env); + osal_txn_unlock(env); } else { eASSERT(env, txn->mt_parent != NULL); MDBX_txn *const parent = txn->mt_parent; @@ -9870,6 +9866,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, sizeof(meta_troika_t)) == 0); + txn->mt_owner = 0; if (txn->tw.lifo_reclaimed) { eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) >= (uintptr_t)parent->tw.lifo_reclaimed); @@ -13258,7 +13255,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, return MDBX_EACCESS; if (!inside_txn) { - int err = mdbx_txn_lock(env, false); + int err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; need_unlock = true; @@ -13609,7 +13606,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, bailout: if (need_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -21675,13 +21672,13 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, return rc; /* Temporarily block writers until we snapshot the meta pages */ - rc = mdbx_txn_lock(env, false); + rc = osal_txn_lock(env, false); if (unlikely(rc != MDBX_SUCCESS)) return rc; rc = txn_renew(read_txn, MDBX_TXN_RDONLY); if (unlikely(rc != MDBX_SUCCESS)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -21693,7 +21690,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, memcpy(buffer, env->me_map, meta_bytes); MDBX_meta *const headcopy = /* LY: get pointer to the snapshot copy */ ptr_disp(buffer, ptr_dist(meta_recent(env, &troika).ptr_c, env->me_map)); - mdbx_txn_unlock(env); + osal_txn_unlock(env); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(headcopy); @@ -21953,7 +21950,7 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, env->me_txn0->mt_owner != osal_thread_self(); bool should_unlock = false; if (lock_needed) { - rc = mdbx_txn_lock(env, false); + rc = osal_txn_lock(env, false); if (unlikely(rc)) return rc; should_unlock = true; @@ -21965,7 +21962,7 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, env->me_flags &= ~flags; if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_SUCCESS; } @@ -24828,7 +24825,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return MDBX_EINVAL; if (env->me_options.dp_reserve_limit != (unsigned)value) { if (lock_needed) { - err = mdbx_txn_lock(env, false); + err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; should_unlock = true; @@ -24868,7 +24865,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; if (lock_needed) { - err = mdbx_txn_lock(env, false); + err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; should_unlock = true; @@ -24968,7 +24965,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, } if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return err; } @@ -25490,6 +25487,39 @@ mdbx_key_from_int32(const int32_t i32) { #endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */ +/*------------------------------------------------------------------------------ + * Locking API */ + +int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->me_txn0->mt_owner || + (env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + return osal_txn_lock(env, dont_wait); +} + +int mdbx_txn_unlock(MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->me_txn0->mt_owner != osal_thread_self())) + return MDBX_THREAD_MISMATCH; + if (unlikely((env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + osal_txn_unlock(env); + return MDBX_SUCCESS; +} + /******************************************************************************/ /* *INDENT-OFF* */ /* clang-format off */ diff --git a/src/internals.h b/src/internals.h index 3d1fd706..1664dcd7 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1482,7 +1482,6 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/src/lck-posix.c b/src/lck-posix.c index 17c50ddd..7f58e9ed 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -120,7 +120,7 @@ mdbx_global_destructor(void) { * - Блокировка таблицы читателей для регистрации, * т.е. функции osal_rdt_lock() и osal_rdt_unlock(). * - Блокировка БД для пишущих транзакций, - * т.е. функции mdbx_txn_lock() и mdbx_txn_unlock(). + * т.е. функции osal_txn_lock() и osal_txn_unlock(). * * Остальной функционал реализуется отдельно посредством файловых блокировок: * - Первоначальный захват БД в режиме exclusive/shared и последующий перевод @@ -527,6 +527,34 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return rc; } +MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); + if (unlikely(osal_getpid() != env->me_pid)) + return MDBX_PANIC; + + const int cmd = dont_wait ? op_setlk : op_setlkw; + int rc = lck_op(env->me_lfd, cmd, F_WRLCK, 0, 1); + if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_EXCLUSIVE) == 0) { + rc = (env->me_pid > 1) + ? lck_op(env->me_lazy_fd, cmd, F_WRLCK, 0, env->me_pid - 1) + : MDBX_SUCCESS; + if (rc == MDBX_SUCCESS) { + rc = lck_op(env->me_lazy_fd, cmd, F_WRLCK, env->me_pid + 1, + OFF_T_MAX - env->me_pid - 1); + if (rc != MDBX_SUCCESS && env->me_pid > 1 && + lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, 0, env->me_pid - 1)) + rc = MDBX_PANIC; + } + if (rc != MDBX_SUCCESS && lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1)) + rc = MDBX_PANIC; + } + if (unlikely(rc != 0)) { + ERROR("%s, err %u", "lck", rc); + assert(MDBX_IS_ERROR(rc)); + } + return rc; +} + __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor) { if (unlikely(osal_getpid() != env->me_pid)) @@ -822,11 +850,6 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, #error "FIXME" #endif /* MDBX_LOCKING */ -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - if (rc == EDEADLK && atomic_load32(&env->me_ignore_EDEADLK, mo_Relaxed) > 0) - return rc; -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ - ERROR("mutex (un)lock failed, %s", mdbx_strerror(err)); if (rc != EDEADLK) env->me_flags |= MDBX_FATAL_ERROR; @@ -931,20 +954,28 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { jitter4testing(true); } -int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { +int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); + eASSERT(env, !env->me_txn0->mt_owner); jitter4testing(true); - int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); - TRACE("<< rc %d", rc); - return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS; + const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); + int rc = err; + if (likely(!MDBX_IS_ERROR(err))) { + env->me_txn0->mt_owner = osal_thread_self(); + rc = MDBX_SUCCESS; + } + TRACE("<< rc %d", err); + return rc; } -void mdbx_txn_unlock(MDBX_env *env) { +void osal_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); - TRACE("<< rc %d", rc); - if (unlikely(rc != MDBX_SUCCESS)) - mdbx_panic("%s() failed: err %d\n", __func__, rc); + eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); + env->me_txn0->mt_owner = 0; + int err = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); + TRACE("<< err %d", err); + if (unlikely(err != MDBX_SUCCESS)) + mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } diff --git a/src/lck-windows.c b/src/lck-windows.c index 8ffccb1b..ed77da30 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -178,7 +178,8 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { #define DXB_BODY (env->me_psize * (size_t)NUM_METAS), DXB_MAXLEN #define DXB_WHOLE 0, DXB_MAXLEN -int mdbx_txn_lock(MDBX_env *env, bool dontwait) { +int osal_txn_lock(MDBX_env *env, bool dontwait) { + eASSERT(env, !env->me_txn0->mt_owner); if (dontwait) { if (!TryEnterCriticalSection(&env->me_windowsbug_lock)) return MDBX_BUSY; @@ -194,12 +195,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } } - if (env->me_flags & MDBX_EXCLUSIVE) { - /* Zap: Failing to release lock 'env->me_windowsbug_lock' - * in function 'mdbx_txn_lock' */ - MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - return MDBX_SUCCESS; - } + if (env->me_flags & MDBX_EXCLUSIVE) + goto done; const HANDLE fd4data = env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; @@ -218,17 +215,20 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } } if (rc == MDBX_SUCCESS) { + done: /* Zap: Failing to release lock 'env->me_windowsbug_lock' * in function 'mdbx_txn_lock' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - return rc; + env->me_txn0->mt_owner = osal_thread_self(); + return MDBX_SUCCESS; } LeaveCriticalSection(&env->me_windowsbug_lock); return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY; } -void mdbx_txn_unlock(MDBX_env *env) { +void osal_txn_unlock(MDBX_env *env) { + eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { const HANDLE fd4data = env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; @@ -236,6 +236,7 @@ void mdbx_txn_unlock(MDBX_env *env) { if (err != MDBX_SUCCESS) mdbx_panic("%s failed: err %u", __func__, err); } + env->me_txn0->mt_owner = 0; LeaveCriticalSection(&env->me_windowsbug_lock); } @@ -442,7 +443,7 @@ osal_resume_threads_after_remap(mdbx_handle_array_t *array) { * The osal_lck_downgrade() moves the locking-FSM from "exclusive write" * state to the "used" (i.e. shared) state. * - * The mdbx_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) + * The osal_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) * state to the "exclusive write" state. */ @@ -615,7 +616,7 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return MDBX_SUCCESS /* 5) now at S-? (used), done */; } -MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { +MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { /* Transite from used state (S-?) to exclusive-write (E-E) */ assert(env->me_lfd != INVALID_HANDLE_VALUE); @@ -625,7 +626,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { /* 1) now on S-? (used), try S-E (locked) */ jitter4testing(false); - int rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER); + int rc = flock(env->me_lfd, + dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, + LCK_UPPER); if (rc != MDBX_SUCCESS) { /* 2) something went wrong, give up */; VERBOSE("%s, err %u", "S-?(used) >> S-E(locked)", rc); @@ -640,7 +643,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { /* 4) now on ?-E (middle), try E-E (exclusive-write) */ jitter4testing(false); - rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER); + rc = flock(env->me_lfd, + dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, + LCK_LOWER); if (rc != MDBX_SUCCESS) { /* 5) something went wrong, give up */; VERBOSE("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc); @@ -686,7 +691,7 @@ MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0; osal_munmap(&env->me_lck_mmap); if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE && - mdbx_lck_upgrade(env) == MDBX_SUCCESS) + osal_lck_upgrade(env, true) == MDBX_SUCCESS) /* this will fail if LCK is used/mmapped by other process(es) */ osal_ftruncate(env->me_lfd, 0); } diff --git a/src/osal.h b/src/osal.h index 3d45da4b..1b5c317f 100644 --- a/src/osal.h +++ b/src/osal.h @@ -718,6 +718,8 @@ MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env); /// operational lock. /// \return Error code or zero on success MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env); +MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, + bool dont_wait); /// \brief Locks LCK-file or/and table of readers for (de)registering. /// \return Error code or zero on success @@ -726,16 +728,12 @@ MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env); /// \brief Unlocks LCK-file or/and table of readers after (de)registering. MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env); -/// \brief Acquires lock for DB change (on writing transaction start) -/// Reading transactions will not be blocked. -/// Declared as LIBMDBX_API because it is used in mdbx_chk. +/// \brief Acquires write-transaction lock. /// \return Error code or zero on success -LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); +MDBX_INTERNAL_FUNC int osal_txn_lock(MDBX_env *env, bool dont_wait); -/// \brief Releases lock once DB changes is made (after writing transaction -/// has finished). -/// Declared as LIBMDBX_API because it is used in mdbx_chk. -LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); +/// \brief Releases write-transaction lock.. +MDBX_INTERNAL_FUNC void osal_txn_unlock(MDBX_env *env); /// \brief Sets alive-flag of reader presence (indicative lock) for PID of /// the current process. The function does no more than needed for