mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 18:24:12 +08:00
mdbx: rework POSIX-lck and merge with Linux-lck.
Change-Id: Id8fbc81b9a2ad3a3a7499ecf9a012314e1f8062a
This commit is contained in:
parent
098f8a0d77
commit
874418a301
@ -12,7 +12,6 @@ src/alloy.c
|
|||||||
src/elements/data.c
|
src/elements/data.c
|
||||||
src/elements/internals.h
|
src/elements/internals.h
|
||||||
src/elements/defs.h
|
src/elements/defs.h
|
||||||
src/elements/lck-linux.c
|
|
||||||
src/elements/lck-posix.c
|
src/elements/lck-posix.c
|
||||||
src/elements/lck-windows.c
|
src/elements/lck-windows.c
|
||||||
src/elements/core.c
|
src/elements/core.c
|
||||||
|
@ -28,8 +28,6 @@ if(MDBX_ALLOY_MODE)
|
|||||||
else()
|
else()
|
||||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||||
set(LIBMDBX_OSAL windows)
|
set(LIBMDBX_OSAL windows)
|
||||||
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
|
||||||
set(LIBMDBX_OSAL linux)
|
|
||||||
else()
|
else()
|
||||||
set(LIBMDBX_OSAL posix)
|
set(LIBMDBX_OSAL posix)
|
||||||
endif()
|
endif()
|
||||||
|
@ -22,9 +22,7 @@
|
|||||||
#include "elements/core.c"
|
#include "elements/core.c"
|
||||||
#include "elements/osal.c"
|
#include "elements/osal.c"
|
||||||
|
|
||||||
#if defined(__linux__) || defined(__gnu_linux__)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
#include "elements/lck-linux.c"
|
|
||||||
#elif defined(_WIN32) || defined(_WIN64)
|
|
||||||
#include "elements/lck-windows.c"
|
#include "elements/lck-windows.c"
|
||||||
#else
|
#else
|
||||||
#include "elements/lck-posix.c"
|
#include "elements/lck-posix.c"
|
||||||
|
@ -674,93 +674,80 @@ static uint64_t rrxmrrxmsx_0(uint64_t v) {
|
|||||||
return v ^ v >> 28;
|
return v ^ v >> 28;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int uniq_poke(const mdbx_mmap_t *map, const uint64_t cadabra) {
|
static int uniq_peek(const mdbx_mmap_t *pending, mdbx_mmap_t *scan) {
|
||||||
int rc;
|
|
||||||
if (map->lck) {
|
|
||||||
map->lck->mti_bait_uniqueness = cadabra;
|
|
||||||
mdbx_flush_noncoherent_cpu_writeback();
|
|
||||||
rc = MDBX_SUCCESS;
|
|
||||||
} else {
|
|
||||||
rc = mdbx_pwrite(map->fd, &cadabra, sizeof(map->lck->mti_bait_uniqueness),
|
|
||||||
offsetof(MDBX_lockinfo, mti_bait_uniqueness));
|
|
||||||
}
|
|
||||||
mdbx_trace("uniq-poke: %s, cadabra 0x016%" PRIx64 ", rc %d",
|
|
||||||
map->lck ? "mem" : "file", cadabra, rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int uniq_peek(const mdbx_mmap_t *map, const uint64_t cadabra) {
|
|
||||||
int rc;
|
int rc;
|
||||||
uint64_t bait;
|
uint64_t bait;
|
||||||
if (map->lck) {
|
if (pending->address) {
|
||||||
mdbx_invalidate_mmap_noncoherent_cache(map->lck, sizeof(*map->lck));
|
bait = pending->lck->mti_bait_uniqueness;
|
||||||
bait = map->lck->mti_bait_uniqueness;
|
|
||||||
rc = MDBX_SUCCESS;
|
rc = MDBX_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
rc = mdbx_pread(map->fd, &bait, sizeof(map->lck->mti_bait_uniqueness),
|
bait = 0 /* hush MSVC warning */;
|
||||||
offsetof(MDBX_lockinfo, mti_bait_uniqueness));
|
rc = mdbx_msync(scan, 0, sizeof(MDBX_lockinfo), true);
|
||||||
|
if (rc == MDBX_SUCCESS)
|
||||||
|
rc =
|
||||||
|
mdbx_pread(pending->fd, &bait, sizeof(scan->lck->mti_bait_uniqueness),
|
||||||
|
offsetof(MDBX_lockinfo, mti_bait_uniqueness));
|
||||||
}
|
}
|
||||||
|
if (likely(rc == MDBX_SUCCESS) && bait == scan->lck->mti_bait_uniqueness)
|
||||||
|
rc = MDBX_RESULT_TRUE;
|
||||||
|
|
||||||
if (unlikely(!MDBX_IS_ERROR(rc)))
|
mdbx_trace("uniq-peek: %s, bait 0x%016" PRIx64 ",%s rc %d",
|
||||||
rc = (bait == cadabra) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE;
|
pending->lck ? "mem" : "file", bait,
|
||||||
|
|
||||||
mdbx_trace("uniq-peek: %s, cadabra 0x%016" PRIx64 ", bait 0x%016" PRIx64
|
|
||||||
",%s rc %d",
|
|
||||||
map->lck ? "mem" : "file", cadabra, bait,
|
|
||||||
(rc == MDBX_RESULT_TRUE) ? " found," : (rc ? " FAILED," : ""), rc);
|
(rc == MDBX_RESULT_TRUE) ? " found," : (rc ? " FAILED," : ""), rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
__cold static int uniq_probe(const mdbx_mmap_t *map, const mdbx_pid_t pid,
|
static int uniq_poke(const mdbx_mmap_t *pending, mdbx_mmap_t *scan,
|
||||||
MDBX_env **found) {
|
uint64_t *abra) {
|
||||||
if (inprocess_lcklist_head == RTHC_ENVLIST_END) {
|
if (*abra == 0) {
|
||||||
mdbx_info("<< uniq-probe: pid %u, env-list empty, skip probing, rc %d",
|
const mdbx_tid_t tid = mdbx_thread_self();
|
||||||
(unsigned)pid, MDBX_RESULT_TRUE);
|
size_t uit = 0;
|
||||||
return MDBX_RESULT_TRUE;
|
memcpy(&uit, &tid, (sizeof(tid) < sizeof(uit)) ? sizeof(tid) : sizeof(uit));
|
||||||
|
*abra =
|
||||||
|
rrxmrrxmsx_0(mdbx_osal_monotime() + UINT64_C(5873865991930747) * uit);
|
||||||
}
|
}
|
||||||
|
const uint64_t cadabra =
|
||||||
|
rrxmrrxmsx_0(*abra + UINT64_C(7680760450171793) * (unsigned)mdbx_getpid())
|
||||||
|
<< 24 |
|
||||||
|
*abra >> 40;
|
||||||
|
scan->lck->mti_bait_uniqueness = cadabra;
|
||||||
|
mdbx_flush_noncoherent_cpu_writeback();
|
||||||
|
*abra = *abra * UINT64_C(6364136223846793005) + 1;
|
||||||
|
return uniq_peek(pending, scan);
|
||||||
|
}
|
||||||
|
|
||||||
const mdbx_tid_t tid = mdbx_thread_self();
|
__cold static int uniq_check(const mdbx_mmap_t *pending, MDBX_env **found) {
|
||||||
size_t uit = 0;
|
*found = nullptr;
|
||||||
memcpy(&uit, &tid, (sizeof(tid) < sizeof(uit)) ? sizeof(tid) : sizeof(uit));
|
uint64_t salt = 0;
|
||||||
uint64_t abra =
|
for (MDBX_env *scan = inprocess_lcklist_head; scan != RTHC_ENVLIST_END;
|
||||||
rrxmrrxmsx_0(mdbx_osal_monotime() + UINT64_C(5873865991930747) * uit);
|
scan = scan->me_lcklist_next) {
|
||||||
|
int err = scan->me_lck_mmap.lck->mti_bait_uniqueness
|
||||||
for (unsigned bits = 4; bits; bits >>= 1) {
|
? uniq_peek(pending, &scan->me_lck_mmap)
|
||||||
abra = abra * UINT64_C(6364136223846793005) + 1;
|
: uniq_poke(pending, &scan->me_lck_mmap, &salt);
|
||||||
const uint64_t cadabra =
|
if (err == MDBX_RESULT_TRUE)
|
||||||
rrxmrrxmsx_0(abra + UINT64_C(7680760450171793) * pid) << 20 |
|
err = uniq_poke(pending, &scan->me_lck_mmap, &salt);
|
||||||
abra >> 44;
|
if (err == MDBX_RESULT_TRUE) {
|
||||||
|
(void)mdbx_msync(&scan->me_lck_mmap, 0, sizeof(MDBX_lockinfo), false);
|
||||||
int err = uniq_poke(map, cadabra);
|
err = uniq_poke(pending, &scan->me_lck_mmap, &salt);
|
||||||
*found = nullptr;
|
|
||||||
for (MDBX_env *env = inprocess_lcklist_head;
|
|
||||||
err == MDBX_SUCCESS && env != RTHC_ENVLIST_END;
|
|
||||||
env = env->me_lcklist_next) {
|
|
||||||
err = uniq_peek(&env->me_lck_mmap, cadabra);
|
|
||||||
if (err == MDBX_RESULT_TRUE)
|
|
||||||
*found = env;
|
|
||||||
}
|
}
|
||||||
|
if (err == MDBX_RESULT_TRUE) {
|
||||||
if (unlikely(MDBX_IS_ERROR(err))) {
|
err = uniq_poke(pending, &scan->me_lck_mmap, &salt);
|
||||||
mdbx_verbose("<< uniq-probe: pid %u, uit %zu, failed rc %d",
|
*found = scan;
|
||||||
(unsigned)pid, uit, err);
|
mdbx_info("<< uniq-probe: found %p", *found);
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
bits += 8 & err;
|
|
||||||
if (bits == 15) {
|
|
||||||
mdbx_info("<< uniq-probe: pid %u, uit %zu, found %p", (unsigned)pid, uit,
|
|
||||||
*found);
|
|
||||||
return MDBX_RESULT_FALSE;
|
return MDBX_RESULT_FALSE;
|
||||||
}
|
}
|
||||||
|
if (unlikely(err != MDBX_SUCCESS)) {
|
||||||
|
mdbx_verbose("<< uniq-probe: failed rc %d", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mdbx_info("<< uniq-probe: pid %u, uit %zu, unique", (unsigned)pid, uit);
|
mdbx_info("<< uniq-probe: unique");
|
||||||
return MDBX_RESULT_TRUE;
|
return MDBX_RESULT_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int lcklist_detach_locked(MDBX_env *env) {
|
static int lcklist_detach_locked(MDBX_env *env) {
|
||||||
MDBX_env *dup = nullptr;
|
MDBX_env *inprocess_neighbor = nullptr;
|
||||||
int rc = MDBX_SUCCESS;
|
int rc = MDBX_SUCCESS;
|
||||||
if (env->me_lcklist_next != nullptr) {
|
if (env->me_lcklist_next != nullptr) {
|
||||||
mdbx_ensure(env, env->me_lcklist_next != nullptr);
|
mdbx_ensure(env, env->me_lcklist_next != nullptr);
|
||||||
@ -776,11 +763,11 @@ static int lcklist_detach_locked(MDBX_env *env) {
|
|||||||
mdbx_ensure(env, env->me_lcklist_next == nullptr);
|
mdbx_ensure(env, env->me_lcklist_next == nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = uniq_probe(&env->me_lck_mmap, env->me_pid, &dup);
|
rc = uniq_check(&env->me_lck_mmap, &inprocess_neighbor);
|
||||||
if (!dup && env->me_live_reader)
|
if (!inprocess_neighbor && env->me_live_reader)
|
||||||
(void)mdbx_rpid_clear(env);
|
(void)mdbx_rpid_clear(env);
|
||||||
if (!MDBX_IS_ERROR(rc))
|
if (!MDBX_IS_ERROR(rc))
|
||||||
rc = mdbx_lck_destroy(env, dup);
|
rc = mdbx_lck_destroy(env, inprocess_neighbor);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6731,11 +6718,15 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MDBX_env *inprocess_neighbor = nullptr;
|
||||||
if (err == MDBX_RESULT_TRUE) {
|
if (err == MDBX_RESULT_TRUE) {
|
||||||
MDBX_env *unused_lckdup_found;
|
err = uniq_check(&env->me_lck_mmap, &inprocess_neighbor);
|
||||||
err = uniq_probe(&env->me_lck_mmap, env->me_pid, &unused_lckdup_found);
|
|
||||||
if (MDBX_IS_ERROR(err))
|
if (MDBX_IS_ERROR(err))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
if (inprocess_neighbor && (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE)) {
|
||||||
|
err = MDBX_BUSY;
|
||||||
|
goto bailout;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const int lck_seize_rc = err;
|
const int lck_seize_rc = err;
|
||||||
|
|
||||||
@ -6814,6 +6805,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
|||||||
if (lck_seize_rc == MDBX_RESULT_TRUE) {
|
if (lck_seize_rc == MDBX_RESULT_TRUE) {
|
||||||
/* LY: exlcusive mode, reset lck */
|
/* LY: exlcusive mode, reset lck */
|
||||||
memset(env->me_lck, 0, (size_t)size);
|
memset(env->me_lck, 0, (size_t)size);
|
||||||
|
mdbx_jitter4testing(false);
|
||||||
env->me_lck->mti_magic_and_version = MDBX_LOCK_MAGIC;
|
env->me_lck->mti_magic_and_version = MDBX_LOCK_MAGIC;
|
||||||
env->me_lck->mti_os_and_format = MDBX_LOCK_FORMAT;
|
env->me_lck->mti_os_and_format = MDBX_LOCK_FORMAT;
|
||||||
} else {
|
} else {
|
||||||
@ -6966,15 +6958,9 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
|
|||||||
MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC;
|
MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC;
|
||||||
if (lck_rc == MDBX_RESULT_TRUE) {
|
if (lck_rc == MDBX_RESULT_TRUE) {
|
||||||
env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY);
|
env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY);
|
||||||
if ((env->me_flags & MDBX_EXCLUSIVE) == 0) {
|
rc = mdbx_lck_downgrade(env);
|
||||||
/* LY: downgrade lock only if exclusive access not requested.
|
mdbx_debug("lck-downgrade-%s: rc %i",
|
||||||
* in case exclusive==1, just leave value as is. */
|
(env->me_flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc);
|
||||||
rc = mdbx_lck_downgrade(env, true);
|
|
||||||
mdbx_debug("lck-downgrade-full: rc %i ", rc);
|
|
||||||
} else {
|
|
||||||
rc = mdbx_lck_downgrade(env, false);
|
|
||||||
mdbx_debug("lck-downgrade-partial: rc %i ", rc);
|
|
||||||
}
|
|
||||||
if (rc != MDBX_SUCCESS)
|
if (rc != MDBX_SUCCESS)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1,489 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2015-2019 Leonid Yuriev <leo@yuriev.ru>
|
|
||||||
* and other libmdbx authors: please see AUTHORS file.
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted only as authorized by the OpenLDAP
|
|
||||||
* Public License.
|
|
||||||
*
|
|
||||||
* A copy of this license is available in the file LICENSE in the
|
|
||||||
* top-level directory of the distribution or, alternatively, at
|
|
||||||
* <http://www.OpenLDAP.org/license.html>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if !(defined(__linux__) || defined(__gnu_linux__))
|
|
||||||
#error "This implementation of locking only supports Linux,\
|
|
||||||
where is no interaction between the types of lock placed\
|
|
||||||
by flock() and fcntl()."
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "./internals.h"
|
|
||||||
#include <sys/utsname.h>
|
|
||||||
|
|
||||||
/* Some platforms define the EOWNERDEAD error code
|
|
||||||
* even though they don't support Robust Mutexes.
|
|
||||||
* Compile with -DMDBX_USE_ROBUST=0. */
|
|
||||||
#ifndef MDBX_USE_ROBUST
|
|
||||||
/* Howard Chu: Android currently lacks Robust Mutex support */
|
|
||||||
#if defined(EOWNERDEAD) && \
|
|
||||||
!defined(__ANDROID__) /* LY: glibc before 2.10 has a troubles \
|
|
||||||
with Robust Mutex too. */ \
|
|
||||||
&& (!defined(__GLIBC__) || __GLIBC_PREREQ(2, 10) || \
|
|
||||||
_POSIX_C_SOURCE >= 200809L)
|
|
||||||
#define MDBX_USE_ROBUST 1
|
|
||||||
#else
|
|
||||||
#define MDBX_USE_ROBUST 0
|
|
||||||
#endif
|
|
||||||
#endif /* MDBX_USE_ROBUST */
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
|
||||||
/* global constructor/destructor */
|
|
||||||
|
|
||||||
#ifndef MDBX_ALLOY
|
|
||||||
uint32_t mdbx_linux_kernel_version;
|
|
||||||
#endif /* MDBX_ALLOY */
|
|
||||||
|
|
||||||
static __cold __attribute__((__constructor__)) void
|
|
||||||
mdbx_global_constructor(void) {
|
|
||||||
struct utsname buffer;
|
|
||||||
if (uname(&buffer) == 0) {
|
|
||||||
int i = 0;
|
|
||||||
char *p = buffer.release;
|
|
||||||
while (*p && i < 4) {
|
|
||||||
if (*p >= '0' && *p <= '9') {
|
|
||||||
long number = strtol(p, &p, 10);
|
|
||||||
if (number > 0) {
|
|
||||||
if (number > 255)
|
|
||||||
number = 255;
|
|
||||||
mdbx_linux_kernel_version += number << (24 - i * 8);
|
|
||||||
}
|
|
||||||
++i;
|
|
||||||
} else {
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mdbx_rthc_global_init();
|
|
||||||
}
|
|
||||||
|
|
||||||
static __cold __attribute__((__destructor__)) void
|
|
||||||
mdbx_global_destructor(void) {
|
|
||||||
mdbx_rthc_global_dtor();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
|
||||||
/* lck */
|
|
||||||
|
|
||||||
/* Описание реализации блокировок для Linux:
|
|
||||||
*
|
|
||||||
* lck-файл отображается в память, в нём организуется таблица читателей и
|
|
||||||
* размещаются совместно используемые posix-мьютексы (futex). Посредством
|
|
||||||
* этих мьютексов (см struct MDBX_lockinfo) реализуются:
|
|
||||||
* - Блокировка таблицы читателей для регистрации,
|
|
||||||
* т.е. функции mdbx_rdt_lock() и mdbx_rdt_unlock().
|
|
||||||
* - Блокировка БД для пишущих транзакций,
|
|
||||||
* т.е. функции mdbx_txn_lock() и mdbx_txn_unlock().
|
|
||||||
*
|
|
||||||
* Остальной функционал реализуется отдельно посредством файловых блокировок:
|
|
||||||
* - Первоначальный захват БД в режиме exclusive/shared и последующий перевод
|
|
||||||
* в операционный режим, функции mdbx_lck_seize() и mdbx_lck_downgrade().
|
|
||||||
* - Проверка присутствие процессов-читателей,
|
|
||||||
* т.е. функции mdbx_rpid_set(), mdbx_rpid_clear() и mdbx_rpid_check().
|
|
||||||
*
|
|
||||||
* Используется два вида файловых блокировок flock() и fcntl(F_SETLK),
|
|
||||||
* как для lck-файла, так и для основного файла БД:
|
|
||||||
* - Для контроля процессов-читателей используются однобайтовые
|
|
||||||
* range-блокировки lck-файла посредством fcntl(F_SETLK). При этом
|
|
||||||
* в качестве позиции используется pid процесса-читателя.
|
|
||||||
* - Для первоначального захвата и shared/exclusive блокировок используется
|
|
||||||
* комбинация flock() и fcntl(F_SETLK) блокировки одного байта lck-файла
|
|
||||||
* в нулевой позиции (нулевая позиция не используется механизмом контроля
|
|
||||||
* процессов-читателей, так как pid пользовательского процесса в Linux
|
|
||||||
* всегда больше 0).
|
|
||||||
* - Кроме этого, flock() блокировка основного файла БД используется при работе
|
|
||||||
* в режимах без lck-файла, как в в read-only, так и в эксклюзивном.
|
|
||||||
* - Блокировки flock() и fcntl(F_SETLK) в Linux работают независимо. Поэтому
|
|
||||||
* их комбинирование позволяет предотвратить совместное использование БД
|
|
||||||
* через NFS, что позволяет fcntl(F_SETLK), одновременно защитившись
|
|
||||||
* от проблем не-аторманости flock() при переходе между эксклюзивным
|
|
||||||
* и атомарным режимами блокировок.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static int op_setlk, op_setlkw, op_getlk;
|
|
||||||
static void __cold choice_fcntl() {
|
|
||||||
assert(!op_setlk && !op_setlkw && !op_getlk);
|
|
||||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK)
|
|
||||||
if (mdbx_linux_kernel_version >
|
|
||||||
0x030f0000 /* OFD locks are available since 3.15, but engages here
|
|
||||||
only for 3.16 and larer kernels (LTS) for reliability reasons */
|
|
||||||
&& (mdbx_runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0) {
|
|
||||||
op_setlk = F_OFD_SETLK;
|
|
||||||
op_setlkw = F_OFD_SETLKW;
|
|
||||||
op_getlk = F_OFD_GETLK;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif /* OFD locks */
|
|
||||||
op_setlk = F_SETLK;
|
|
||||||
op_setlkw = F_SETLKW;
|
|
||||||
op_getlk = F_GETLK;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef OFF_T_MAX
|
|
||||||
#define OFF_T_MAX \
|
|
||||||
((sizeof(off_t) > 4 ? INT64_MAX : INT32_MAX) & ~(size_t)0xffff)
|
|
||||||
#endif
|
|
||||||
#define LCK_WHOLE OFF_T_MAX
|
|
||||||
|
|
||||||
static int mdbx_lck_op(mdbx_filehandle_t fd, int cmd, short lck, off_t offset,
|
|
||||||
off_t len) {
|
|
||||||
for (;;) {
|
|
||||||
struct flock lock_op;
|
|
||||||
memset(&lock_op, 0, sizeof(lock_op));
|
|
||||||
lock_op.l_type = lck;
|
|
||||||
lock_op.l_whence = SEEK_SET;
|
|
||||||
lock_op.l_start = offset;
|
|
||||||
lock_op.l_len = len;
|
|
||||||
if (fcntl(fd, cmd, &lock_op) == 0) {
|
|
||||||
if (cmd == op_getlk) {
|
|
||||||
/* Checks reader by pid. Returns:
|
|
||||||
* MDBX_RESULT_TRUE - if pid is live (unable to acquire lock)
|
|
||||||
* MDBX_RESULT_FALSE - if pid is dead (lock acquired). */
|
|
||||||
return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE
|
|
||||||
: MDBX_RESULT_TRUE;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
int rc = errno;
|
|
||||||
if (rc != EINTR || cmd == op_setlkw)
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline int mdbx_lck_exclusive(int lfd, bool fallback2shared) {
|
|
||||||
assert(lfd != INVALID_HANDLE_VALUE);
|
|
||||||
if (flock(lfd, LOCK_EX | LOCK_NB))
|
|
||||||
return errno;
|
|
||||||
int rc = mdbx_lck_op(lfd, op_setlk, F_WRLCK, 0, 1);
|
|
||||||
if (rc != 0 && fallback2shared) {
|
|
||||||
while (flock(lfd, LOCK_SH)) {
|
|
||||||
int rc = errno;
|
|
||||||
if (rc != EINTR)
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline int mdbx_lck_shared(int lfd) {
|
|
||||||
assert(lfd != INVALID_HANDLE_VALUE);
|
|
||||||
while (flock(lfd, LOCK_SH)) {
|
|
||||||
int rc = errno;
|
|
||||||
if (rc != EINTR)
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
return mdbx_lck_op(lfd, op_setlkw, F_RDLCK, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
|
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
|
||||||
return complete ? mdbx_lck_shared(env->me_lfd) : MDBX_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
|
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
|
||||||
assert(env->me_pid > 0);
|
|
||||||
return mdbx_lck_op(env->me_lfd, op_setlk, F_WRLCK, env->me_pid, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
|
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
|
||||||
assert(env->me_pid > 0);
|
|
||||||
return mdbx_lck_op(env->me_lfd, op_setlkw, F_UNLCK, env->me_pid, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) {
|
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
|
||||||
assert(pid > 0);
|
|
||||||
return mdbx_lck_op(env->me_lfd, op_getlk, F_WRLCK, pid, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
static int mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex,
|
|
||||||
const int rc);
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
|
|
||||||
int global_uniqueness_flag) {
|
|
||||||
if (global_uniqueness_flag == MDBX_RESULT_FALSE)
|
|
||||||
return MDBX_SUCCESS;
|
|
||||||
|
|
||||||
pthread_mutexattr_t ma;
|
|
||||||
int rc = pthread_mutexattr_init(&ma);
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
rc = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
|
||||||
if (rc)
|
|
||||||
goto bailout;
|
|
||||||
|
|
||||||
#if MDBX_USE_ROBUST
|
|
||||||
#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) && \
|
|
||||||
!defined(pthread_mutex_consistent) && _POSIX_C_SOURCE < 200809L
|
|
||||||
rc = pthread_mutexattr_setrobust_np(&ma, PTHREAD_MUTEX_ROBUST_NP);
|
|
||||||
#else
|
|
||||||
rc = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
|
||||||
#endif
|
|
||||||
if (rc)
|
|
||||||
goto bailout;
|
|
||||||
#endif /* MDBX_USE_ROBUST */
|
|
||||||
|
|
||||||
#if _POSIX_C_SOURCE >= 199506L && !defined(MDBX_SAFE4QEMU)
|
|
||||||
rc = pthread_mutexattr_setprotocol(&ma, PTHREAD_PRIO_INHERIT);
|
|
||||||
if (rc == ENOTSUP)
|
|
||||||
rc = pthread_mutexattr_setprotocol(&ma, PTHREAD_PRIO_NONE);
|
|
||||||
if (rc)
|
|
||||||
goto bailout;
|
|
||||||
#endif /* PTHREAD_PRIO_INHERIT */
|
|
||||||
|
|
||||||
rc = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
|
||||||
if (rc)
|
|
||||||
goto bailout;
|
|
||||||
|
|
||||||
rc = pthread_mutex_init(&env->me_lck->mti_rmutex, &ma);
|
|
||||||
if (rc)
|
|
||||||
goto bailout;
|
|
||||||
rc = pthread_mutex_init(&env->me_lck->mti_wmutex, &ma);
|
|
||||||
|
|
||||||
bailout:
|
|
||||||
pthread_mutexattr_destroy(&ma);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
|
|
||||||
MDBX_env *inprocess_neighbor) {
|
|
||||||
if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor &&
|
|
||||||
env->me_lck &&
|
|
||||||
/* try get exclusive access */ mdbx_lck_exclusive(env->me_lfd, false) ==
|
|
||||||
0) {
|
|
||||||
mdbx_info("%s: got exclusive, drown mutexes", mdbx_func_);
|
|
||||||
int rc = pthread_mutex_destroy(&env->me_lck->mti_rmutex);
|
|
||||||
if (rc == 0)
|
|
||||||
rc = pthread_mutex_destroy(&env->me_lck->mti_wmutex);
|
|
||||||
assert(rc == 0);
|
|
||||||
(void)rc;
|
|
||||||
msync(env->me_lck, env->me_os_psize, MS_ASYNC);
|
|
||||||
/* file locks would be released (by kernel)
|
|
||||||
* while the me_lfd will be closed */
|
|
||||||
}
|
|
||||||
|
|
||||||
if (op_setlk == F_SETLK) {
|
|
||||||
/* File locks would be released (by kernel) while the file-descriptors
|
|
||||||
* will be closed. But to avoid false-positive EDEADLK from the kernel,
|
|
||||||
* locks should be released here explicitly with properly order. */
|
|
||||||
|
|
||||||
/* POSIX's fcntl() locks should be restored after file was closed.
|
|
||||||
* FIXME: This code should be rethinked and retested, since it will
|
|
||||||
* executed in really rare cases.
|
|
||||||
*
|
|
||||||
* On the other hand, seems more reasonable to disallow multi-open feature
|
|
||||||
* by default, and describe it as "use at your own risk". Currently
|
|
||||||
* multi-open required only for libfpta's unit-tests. */
|
|
||||||
|
|
||||||
int rc = MDBX_SUCCESS;
|
|
||||||
/* close clk and restore locks */
|
|
||||||
if (env->me_lfd != INVALID_HANDLE_VALUE) {
|
|
||||||
(void)close(env->me_lfd);
|
|
||||||
env->me_lfd = INVALID_HANDLE_VALUE;
|
|
||||||
if (inprocess_neighbor) {
|
|
||||||
/* restore file-locks */
|
|
||||||
if (rc == MDBX_SUCCESS)
|
|
||||||
rc = mdbx_lck_op(inprocess_neighbor->me_lfd, F_SETLKW, F_RDLCK, 0, 1);
|
|
||||||
if (rc == MDBX_SUCCESS)
|
|
||||||
rc = mdbx_rpid_set(inprocess_neighbor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* close dxb and restore lock */
|
|
||||||
if (env->me_fd != INVALID_HANDLE_VALUE) {
|
|
||||||
(void)close(env->me_fd);
|
|
||||||
env->me_fd = INVALID_HANDLE_VALUE;
|
|
||||||
if (inprocess_neighbor && rc == MDBX_SUCCESS) {
|
|
||||||
/* restore file-lock */
|
|
||||||
rc = mdbx_lck_op(
|
|
||||||
inprocess_neighbor->me_fd, F_SETLKW,
|
|
||||||
(inprocess_neighbor->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
|
|
||||||
(inprocess_neighbor->me_lfd == INVALID_HANDLE_VALUE)
|
|
||||||
? 0
|
|
||||||
: inprocess_neighbor->me_pid,
|
|
||||||
(inprocess_neighbor->me_lfd == INVALID_HANDLE_VALUE) ? OFF_T_MAX
|
|
||||||
: 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inprocess_neighbor && rc != MDBX_SUCCESS) {
|
|
||||||
inprocess_neighbor->me_flags |= MDBX_FATAL_ERROR;
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return MDBX_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mdbx_robust_lock(MDBX_env *env, pthread_mutex_t *mutex) {
|
|
||||||
int rc = pthread_mutex_lock(mutex);
|
|
||||||
if (unlikely(rc != 0))
|
|
||||||
rc = mdbx_mutex_failed(env, mutex, rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mdbx_robust_trylock(MDBX_env *env, pthread_mutex_t *mutex) {
|
|
||||||
int rc = pthread_mutex_trylock(mutex);
|
|
||||||
if (unlikely(rc != 0 && rc != EBUSY))
|
|
||||||
rc = mdbx_mutex_failed(env, mutex, rc);
|
|
||||||
return (rc != EBUSY) ? rc : MDBX_BUSY;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mdbx_robust_unlock(MDBX_env *env, pthread_mutex_t *mutex) {
|
|
||||||
int rc = pthread_mutex_unlock(mutex);
|
|
||||||
if (unlikely(rc != 0))
|
|
||||||
rc = mdbx_mutex_failed(env, mutex, rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rdt_lock(MDBX_env *env) {
|
|
||||||
mdbx_trace(">>");
|
|
||||||
int rc = mdbx_robust_lock(env, &env->me_lck->mti_rmutex);
|
|
||||||
mdbx_trace("<< rc %d", rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
|
|
||||||
mdbx_trace(">>");
|
|
||||||
int rc = mdbx_robust_unlock(env, &env->me_lck->mti_rmutex);
|
|
||||||
mdbx_trace("<< rc %d", rc);
|
|
||||||
if (unlikely(MDBX_IS_ERROR(rc)))
|
|
||||||
mdbx_panic("%s() failed: errcode %d\n", mdbx_func_, rc);
|
|
||||||
}
|
|
||||||
|
|
||||||
int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
|
|
||||||
mdbx_trace(">>");
|
|
||||||
int rc = dontwait ? mdbx_robust_trylock(env, env->me_wmutex)
|
|
||||||
: mdbx_robust_lock(env, env->me_wmutex);
|
|
||||||
mdbx_trace("<< rc %d", rc);
|
|
||||||
return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void mdbx_txn_unlock(MDBX_env *env) {
|
|
||||||
mdbx_trace(">>");
|
|
||||||
int rc = mdbx_robust_unlock(env, env->me_wmutex);
|
|
||||||
mdbx_trace("<< rc %d", rc);
|
|
||||||
if (unlikely(MDBX_IS_ERROR(rc)))
|
|
||||||
mdbx_panic("%s() failed: errcode %d\n", mdbx_func_, rc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __cold internal_seize_lck(int lfd) {
|
|
||||||
assert(lfd != INVALID_HANDLE_VALUE);
|
|
||||||
|
|
||||||
/* try exclusive access */
|
|
||||||
int rc = mdbx_lck_exclusive(lfd, false);
|
|
||||||
if (rc == 0)
|
|
||||||
/* got exclusive */
|
|
||||||
return MDBX_RESULT_TRUE;
|
|
||||||
if (rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK) {
|
|
||||||
/* get shared access */
|
|
||||||
rc = mdbx_lck_shared(lfd);
|
|
||||||
if (rc == 0) {
|
|
||||||
/* got shared, try exclusive again */
|
|
||||||
rc = mdbx_lck_exclusive(lfd, true);
|
|
||||||
if (rc == 0)
|
|
||||||
/* now got exclusive */
|
|
||||||
return MDBX_RESULT_TRUE;
|
|
||||||
if (rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK)
|
|
||||||
/* unable exclusive, but stay shared */
|
|
||||||
return MDBX_RESULT_FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(MDBX_IS_ERROR(rc));
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
|
|
||||||
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
|
||||||
if (unlikely(op_setlk == 0))
|
|
||||||
choice_fcntl();
|
|
||||||
|
|
||||||
if (env->me_lfd == INVALID_HANDLE_VALUE) {
|
|
||||||
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
|
|
||||||
int rc = mdbx_lck_op(env->me_fd, op_setlk,
|
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
|
|
||||||
LCK_WHOLE);
|
|
||||||
if (rc != 0) {
|
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
return MDBX_RESULT_TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((env->me_flags & MDBX_RDONLY) == 0) {
|
|
||||||
/* Check that another process don't operates in without-lck mode. */
|
|
||||||
int rc = mdbx_lck_op(env->me_fd, op_setlk, F_WRLCK, env->me_pid, 1);
|
|
||||||
if (rc != 0) {
|
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
|
|
||||||
"lock-against-without-lck", rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return internal_seize_lck(env->me_lfd);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __cold mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex,
|
|
||||||
const int err) {
|
|
||||||
int rc = err;
|
|
||||||
#if MDBX_USE_ROBUST
|
|
||||||
if (err == EOWNERDEAD) {
|
|
||||||
/* We own the mutex. Clean up after dead previous owner. */
|
|
||||||
|
|
||||||
int rlocked = (env->me_lck && mutex == &env->me_lck->mti_rmutex);
|
|
||||||
rc = MDBX_SUCCESS;
|
|
||||||
if (!rlocked) {
|
|
||||||
if (unlikely(env->me_txn)) {
|
|
||||||
/* env is hosed if the dead thread was ours */
|
|
||||||
env->me_flags |= MDBX_FATAL_ERROR;
|
|
||||||
env->me_txn = NULL;
|
|
||||||
rc = MDBX_PANIC;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mdbx_notice("%cmutex owner died, %s", (rlocked ? 'r' : 'w'),
|
|
||||||
(rc ? "this process' env is hosed" : "recovering"));
|
|
||||||
|
|
||||||
int check_rc = mdbx_reader_check0(env, rlocked, NULL);
|
|
||||||
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
|
|
||||||
|
|
||||||
#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) && \
|
|
||||||
!defined(pthread_mutex_consistent) && _POSIX_C_SOURCE < 200809L
|
|
||||||
int mreco_rc = pthread_mutex_consistent_np(mutex);
|
|
||||||
#else
|
|
||||||
int mreco_rc = pthread_mutex_consistent(mutex);
|
|
||||||
#endif
|
|
||||||
check_rc = (mreco_rc == 0) ? check_rc : mreco_rc;
|
|
||||||
|
|
||||||
if (unlikely(mreco_rc))
|
|
||||||
mdbx_error("mutex recovery failed, %s", mdbx_strerror(mreco_rc));
|
|
||||||
|
|
||||||
rc = (rc == MDBX_SUCCESS) ? check_rc : rc;
|
|
||||||
if (MDBX_IS_ERROR(rc))
|
|
||||||
pthread_mutex_unlock(mutex);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
(void)mutex;
|
|
||||||
#endif /* MDBX_USE_ROBUST */
|
|
||||||
|
|
||||||
mdbx_error("mutex (un)lock failed, %s", mdbx_strerror(err));
|
|
||||||
if (rc != EDEADLK)
|
|
||||||
env->me_flags |= MDBX_FATAL_ERROR;
|
|
||||||
return rc;
|
|
||||||
}
|
|
@ -14,22 +14,63 @@
|
|||||||
|
|
||||||
#include "./internals.h"
|
#include "./internals.h"
|
||||||
|
|
||||||
/* Some platforms define the EOWNERDEAD error code
|
/* Some platforms define the EOWNERDEAD error code even though they
|
||||||
* even though they don't support Robust Mutexes.
|
* don't support Robust Mutexes. Compile with -DMDBX_USE_ROBUST=0. */
|
||||||
* Compile with -DMDBX_USE_ROBUST=0. */
|
|
||||||
#ifndef MDBX_USE_ROBUST
|
#ifndef MDBX_USE_ROBUST
|
||||||
#if (defined(EOWNERDEAD) || _POSIX_C_SOURCE >= 200809L) && !defined(__APPLE__)
|
/* Howard Chu: Android currently lacks Robust Mutex support */
|
||||||
|
#if defined(EOWNERDEAD) && !defined(__ANDROID__) && !defined(__APPLE__) && \
|
||||||
|
(!defined(__GLIBC__) || \
|
||||||
|
__GLIBC_PREREQ( \
|
||||||
|
2, \
|
||||||
|
10) /* LY: glibc before 2.10 has a troubles with Robust Mutex too. */ \
|
||||||
|
|| _POSIX_C_SOURCE >= 200809L)
|
||||||
#define MDBX_USE_ROBUST 1
|
#define MDBX_USE_ROBUST 1
|
||||||
#else
|
#else
|
||||||
#define MDBX_USE_ROBUST 0
|
#define MDBX_USE_ROBUST 0
|
||||||
#endif
|
#endif
|
||||||
#endif /* MDBX_USE_ROBUST */
|
#endif /* MDBX_USE_ROBUST */
|
||||||
|
|
||||||
|
#ifndef MDBX_USE_OFDLOCKS
|
||||||
|
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK)
|
||||||
|
#define MDBX_USE_OFDLOCKS 1
|
||||||
|
#else
|
||||||
|
#define MDBX_USE_OFDLOCKS 0
|
||||||
|
#endif
|
||||||
|
#endif /* MDBX_USE_OFDLOCKS */
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
/* rthc */
|
/* global constructor/destructor */
|
||||||
|
|
||||||
|
#if defined(__linux__) || defined(__gnu_linux__)
|
||||||
|
#include <sys/utsname.h>
|
||||||
|
#ifndef MDBX_ALLOY
|
||||||
|
uint32_t mdbx_linux_kernel_version;
|
||||||
|
#endif /* MDBX_ALLOY */
|
||||||
|
#endif /* Linux */
|
||||||
|
|
||||||
static __cold __attribute__((__constructor__)) void
|
static __cold __attribute__((__constructor__)) void
|
||||||
mdbx_global_constructor(void) {
|
mdbx_global_constructor(void) {
|
||||||
|
#if defined(__linux__) || defined(__gnu_linux__)
|
||||||
|
struct utsname buffer;
|
||||||
|
if (uname(&buffer) == 0) {
|
||||||
|
int i = 0;
|
||||||
|
char *p = buffer.release;
|
||||||
|
while (*p && i < 4) {
|
||||||
|
if (*p >= '0' && *p <= '9') {
|
||||||
|
long number = strtol(p, &p, 10);
|
||||||
|
if (number > 0) {
|
||||||
|
if (number > 255)
|
||||||
|
number = 255;
|
||||||
|
mdbx_linux_kernel_version += number << (24 - i * 8);
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
} else {
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* Linux */
|
||||||
|
|
||||||
mdbx_rthc_global_init();
|
mdbx_rthc_global_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,7 +82,7 @@ mdbx_global_destructor(void) {
|
|||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
/* lck */
|
/* lck */
|
||||||
|
|
||||||
/* Описание реализации блокировок для POSIX:
|
/* Описание реализации блокировок для POSIX & Linux:
|
||||||
*
|
*
|
||||||
* lck-файл отображается в память, в нём организуется таблица читателей и
|
* lck-файл отображается в память, в нём организуется таблица читателей и
|
||||||
* размещаются совместно используемые posix-мьютексы (futex). Посредством
|
* размещаются совместно используемые posix-мьютексы (futex). Посредством
|
||||||
@ -57,7 +98,7 @@ mdbx_global_destructor(void) {
|
|||||||
* - Проверка присутствие процессов-читателей,
|
* - Проверка присутствие процессов-читателей,
|
||||||
* т.е. функции mdbx_rpid_set(), mdbx_rpid_clear() и mdbx_rpid_check().
|
* т.е. функции mdbx_rpid_set(), mdbx_rpid_clear() и mdbx_rpid_check().
|
||||||
*
|
*
|
||||||
* Для блокировки файлов Используется только fcntl(F_SETLK), так как:
|
* Для блокировки файлов используется fcntl(F_SETLK), так как:
|
||||||
* - lockf() оперирует только эксклюзивной блокировкой и требует
|
* - lockf() оперирует только эксклюзивной блокировкой и требует
|
||||||
* открытия файла в RW-режиме.
|
* открытия файла в RW-режиме.
|
||||||
* - flock() не гарантирует атомарности при смене блокировок
|
* - flock() не гарантирует атомарности при смене блокировок
|
||||||
@ -67,28 +108,68 @@ mdbx_global_destructor(void) {
|
|||||||
* в качестве позиции используется pid процесса-читателя.
|
* в качестве позиции используется pid процесса-читателя.
|
||||||
* - Для первоначального захвата и shared/exclusive выполняется блокировка
|
* - Для первоначального захвата и shared/exclusive выполняется блокировка
|
||||||
* основного файла БД и при успехе lck-файла.
|
* основного файла БД и при успехе lck-файла.
|
||||||
|
*
|
||||||
|
* ----------------------------------------------------------------------------
|
||||||
|
* УДЕРЖИВАЕМЫЕ БЛОКИРОВКИ В ЗАВИСИМОСТИ ОТ РЕЖИМА И СОСТОЯНИЯ
|
||||||
|
*
|
||||||
|
* Эксклюзивный режим без lck-файла:
|
||||||
|
* = заблокирован весь dxb-файл посредством F_RDLCK или F_WRLCK,
|
||||||
|
* в зависимости от MDBX_RDONLY.
|
||||||
|
*
|
||||||
|
* Не-операционный режим на время пере-инициализации и разрушении lck-файла:
|
||||||
|
* = F_WRLCK блокировка первого байта lck-файла, другие процессы ждут её
|
||||||
|
* снятия при получении F_RDLCK через F_SETLKW.
|
||||||
|
* - блокировки dxb-файла могут меняться до снятие эксклюзивной блокировки
|
||||||
|
* lck-файла:
|
||||||
|
* + для НЕ-эксклюзивного режима блокировка pid-байта в dxb-файле
|
||||||
|
* посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
|
||||||
|
* + для ЭКСКЛЮЗИВНОГО режима блокировка pid-байта всего dxb-файла
|
||||||
|
* посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
|
||||||
|
*
|
||||||
|
* ОПЕРАЦИОННЫЙ режим с lck-файлом:
|
||||||
|
* = F_RDLCK блокировка первого байта lck-файла, другие процессы не могут
|
||||||
|
* получить F_WRLCK и таким образом видят что БД используется.
|
||||||
|
* + F_WRLCK блокировка pid-байта в clk-файле после первой транзакции чтения.
|
||||||
|
* + для НЕ-эксклюзивного режима блокировка pid-байта в dxb-файле
|
||||||
|
* посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
|
||||||
|
* + для ЭКСКЛЮЗИВНОГО режима блокировка pid-байта всего dxb-файла
|
||||||
|
* посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if MDBX_USE_OFDLOCKS
|
||||||
|
static int op_setlk, op_setlkw, op_getlk;
|
||||||
|
static void __cold choice_fcntl() {
|
||||||
|
assert(!op_setlk && !op_setlkw && !op_getlk);
|
||||||
|
if ((mdbx_runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0
|
||||||
|
#if defined(__linux__) || defined(__gnu_linux__)
|
||||||
|
&& mdbx_linux_kernel_version >
|
||||||
|
0x030f0000 /* OFD locks are available since 3.15, but engages here
|
||||||
|
only for 3.16 and larer kernels (LTS) for reliability reasons */
|
||||||
|
#endif /* linux */
|
||||||
|
) {
|
||||||
|
op_setlk = F_OFD_SETLK;
|
||||||
|
op_setlkw = F_OFD_SETLKW;
|
||||||
|
op_getlk = F_OFD_GETLK;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
op_setlk = F_SETLK;
|
||||||
|
op_setlkw = F_SETLKW;
|
||||||
|
op_getlk = F_GETLK;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define op_setlk F_SETLK
|
||||||
|
#define op_setlkw F_SETLKW
|
||||||
|
#define op_getlk F_GETLK
|
||||||
|
#endif /* MDBX_USE_OFDLOCKS */
|
||||||
|
|
||||||
#ifndef OFF_T_MAX
|
#ifndef OFF_T_MAX
|
||||||
#define OFF_T_MAX \
|
#define OFF_T_MAX \
|
||||||
((sizeof(off_t) > 4 ? INT64_MAX : INT32_MAX) & ~(size_t)0xffff)
|
((sizeof(off_t) > 4 ? INT64_MAX : INT32_MAX) & ~(size_t)0xffff)
|
||||||
#endif
|
#endif
|
||||||
#ifndef PID_T_MAX
|
|
||||||
#define PID_T_MAX INT_MAX
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK)
|
static int lck_op(mdbx_filehandle_t fd, int cmd, short lck, off_t offset,
|
||||||
#define OP_SETLK F_OFD_SETLK
|
off_t len) {
|
||||||
#define OP_SETLKW F_OFD_SETLKW
|
mdbx_jitter4testing(true);
|
||||||
#define OP_GETLK F_OFD_GETLK
|
|
||||||
#else
|
|
||||||
#define OP_SETLK F_SETLK
|
|
||||||
#define OP_SETLKW F_SETLKW
|
|
||||||
#define OP_GETLK F_GETLK
|
|
||||||
#endif /* OFD locks */
|
|
||||||
|
|
||||||
static int mdbx_lck_op(mdbx_filehandle_t fd, int cmd, short lck, off_t offset,
|
|
||||||
off_t len) {
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
struct flock lock_op;
|
struct flock lock_op;
|
||||||
memset(&lock_op, 0, sizeof(lock_op));
|
memset(&lock_op, 0, sizeof(lock_op));
|
||||||
@ -96,131 +177,212 @@ static int mdbx_lck_op(mdbx_filehandle_t fd, int cmd, short lck, off_t offset,
|
|||||||
lock_op.l_whence = SEEK_SET;
|
lock_op.l_whence = SEEK_SET;
|
||||||
lock_op.l_start = offset;
|
lock_op.l_start = offset;
|
||||||
lock_op.l_len = len;
|
lock_op.l_len = len;
|
||||||
if (fcntl(fd, cmd, &lock_op) == 0) {
|
int rc = fcntl(fd, cmd, &lock_op);
|
||||||
if (cmd == OP_GETLK) {
|
mdbx_jitter4testing(true);
|
||||||
|
if (rc != -1) {
|
||||||
|
if (cmd == op_getlk) {
|
||||||
/* Checks reader by pid. Returns:
|
/* Checks reader by pid. Returns:
|
||||||
* MDBX_RESULT_TRUE - if pid is live (unable to acquire lock)
|
* MDBX_RESULT_TRUE - if pid is live (unable to acquire lock)
|
||||||
* MDBX_RESULT_FALSE - if pid is dead (lock acquired). */
|
* MDBX_RESULT_FALSE - if pid is dead (lock acquired). */
|
||||||
return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE
|
return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE
|
||||||
: MDBX_RESULT_TRUE;
|
: MDBX_RESULT_TRUE;
|
||||||
}
|
}
|
||||||
return 0;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
int rc = errno;
|
rc = errno;
|
||||||
if (rc != EINTR || cmd == F_SETLKW)
|
if (rc != EINTR || cmd == op_setlkw) {
|
||||||
|
mdbx_assert(nullptr, MDBX_IS_ERROR(rc));
|
||||||
return rc;
|
return rc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
|
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||||
assert(env->me_pid > 0 && env->me_pid <= PID_T_MAX);
|
assert(env->me_pid > 0);
|
||||||
return mdbx_lck_op(env->me_lfd, OP_SETLK, F_WRLCK, env->me_pid, 1);
|
return lck_op(env->me_lfd, op_setlk, F_WRLCK, env->me_pid, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
|
MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||||
assert(env->me_pid > 0 && env->me_pid <= PID_T_MAX);
|
assert(env->me_pid > 0);
|
||||||
return mdbx_lck_op(env->me_lfd, OP_SETLKW, F_UNLCK, env->me_pid, 1);
|
return lck_op(env->me_lfd, op_setlk, F_UNLCK, env->me_pid, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) {
|
MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) {
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||||
assert(pid > 0 && pid <= PID_T_MAX);
|
assert(pid > 0);
|
||||||
assert(PID_T_MAX < OFF_T_MAX);
|
return lck_op(env->me_lfd, op_getlk, F_WRLCK, pid, 1);
|
||||||
return mdbx_lck_op(env->me_lfd, OP_GETLK, F_WRLCK, pid, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
|
MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
|
||||||
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
||||||
assert(env->me_pid > 0 && env->me_pid <= PID_T_MAX);
|
#if MDBX_USE_OFDLOCKS
|
||||||
|
if (unlikely(op_setlk == 0))
|
||||||
|
choice_fcntl();
|
||||||
|
#endif /* MDBX_USE_OFDLOCKS */
|
||||||
|
|
||||||
|
int rc;
|
||||||
if (env->me_lfd == INVALID_HANDLE_VALUE) {
|
if (env->me_lfd == INVALID_HANDLE_VALUE) {
|
||||||
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
|
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
|
||||||
int rc = mdbx_lck_op(env->me_fd, OP_SETLK,
|
rc =
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
|
lck_op(env->me_fd, op_setlk,
|
||||||
OFF_T_MAX);
|
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
|
||||||
if (rc != 0) {
|
if (rc != MDBX_SUCCESS) {
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc);
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc);
|
||||||
|
mdbx_assert(env, MDBX_IS_ERROR(rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
return MDBX_RESULT_TRUE;
|
return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* try exclusive access */
|
/* Firstly try to get exclusive locking. */
|
||||||
int rc = mdbx_lck_op(env->me_fd, OP_SETLK,
|
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1);
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
|
if (rc == MDBX_SUCCESS) {
|
||||||
OFF_T_MAX);
|
continue_dxb_exclusive:
|
||||||
if (rc == 0) {
|
rc =
|
||||||
continue_exclusive:
|
lck_op(env->me_fd, op_setlk,
|
||||||
/* got dxb-exclusive, continue lck-exclusive */
|
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
|
||||||
rc = mdbx_lck_op(env->me_lfd, OP_SETLKW, F_WRLCK, 0, OFF_T_MAX);
|
if (rc == MDBX_SUCCESS)
|
||||||
if (rc == 0) {
|
return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
|
||||||
/* got both exclusive */
|
|
||||||
return MDBX_RESULT_TRUE;
|
/* the cause may be a collision with POSIX's file-lock recovery. */
|
||||||
|
if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
|
||||||
|
rc == EDEADLK)) {
|
||||||
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "dxb-exclusive", rc);
|
||||||
|
mdbx_assert(env, MDBX_IS_ERROR(rc));
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Fallback to lck-shared */
|
||||||
|
rc = lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1);
|
||||||
|
if (rc != MDBX_SUCCESS) {
|
||||||
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "fallback-shared",
|
||||||
|
rc);
|
||||||
|
mdbx_assert(env, MDBX_IS_ERROR(rc));
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
/* Done: return with shared locking. */
|
||||||
|
return MDBX_RESULT_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for lck-shared now. */
|
||||||
|
/* Here may be await during transient processes, for instance until another
|
||||||
|
* competing process doesn't call lck_downgrade(). */
|
||||||
|
rc = lck_op(env->me_lfd, op_setlkw, F_RDLCK, 0, 1);
|
||||||
|
if (rc != MDBX_SUCCESS) {
|
||||||
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "try-shared", rc);
|
||||||
|
mdbx_assert(env, MDBX_IS_ERROR(rc));
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lock against another process operating in without-lck or exclusive mode. */
|
||||||
|
rc =
|
||||||
|
lck_op(env->me_fd, op_setlk,
|
||||||
|
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->me_pid, 1);
|
||||||
|
if (rc != MDBX_SUCCESS) {
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
|
||||||
"lck-after-dxb-exclusive", rc);
|
"lock-against-without-lck", rc);
|
||||||
assert(MDBX_IS_ERROR(rc));
|
mdbx_assert(env, MDBX_IS_ERROR(rc));
|
||||||
goto bailout;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK) {
|
/* got shared, retry exclusive */
|
||||||
rc = mdbx_lck_op(env->me_fd, OP_SETLKW,
|
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1);
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
|
if (rc == MDBX_SUCCESS)
|
||||||
env->me_pid, 1);
|
goto continue_dxb_exclusive;
|
||||||
if (rc == 0) {
|
|
||||||
/* got dxb-shared, try again dxb-exclusive */
|
|
||||||
rc = mdbx_lck_op(env->me_fd, OP_SETLK,
|
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
|
|
||||||
OFF_T_MAX);
|
|
||||||
if (rc == 0)
|
|
||||||
goto continue_exclusive;
|
|
||||||
|
|
||||||
/* continue lck-shared */
|
if (rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
|
||||||
rc = mdbx_lck_op(env->me_lfd, OP_SETLKW, F_RDLCK, 0, 1);
|
rc == EDEADLK)
|
||||||
if (rc == 0) {
|
return MDBX_RESULT_FALSE /* Done: exclusive is unavailable,
|
||||||
/* got both dxb and lck shared lock */
|
but shared locks are alive. */
|
||||||
return MDBX_RESULT_FALSE;
|
;
|
||||||
}
|
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "lck-shared", rc);
|
|
||||||
} else {
|
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "dxb-shared", rc);
|
|
||||||
}
|
|
||||||
assert(MDBX_IS_ERROR(rc));
|
|
||||||
}
|
|
||||||
|
|
||||||
bailout:
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "try-exclusive", rc);
|
||||||
(void)mdbx_lck_op(env->me_lfd, OP_SETLK, F_UNLCK, 0, OFF_T_MAX);
|
mdbx_assert(env, MDBX_IS_ERROR(rc));
|
||||||
(void)mdbx_lck_op(env->me_fd, OP_SETLK, F_UNLCK, 0, OFF_T_MAX);
|
|
||||||
assert(MDBX_IS_ERROR(rc));
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
|
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||||
int rc = mdbx_lck_op(env->me_lfd, OP_SETLK, F_UNLCK, 1, OFF_T_MAX - 1);
|
int rc = MDBX_SUCCESS;
|
||||||
if (rc == 0)
|
if ((env->me_flags & MDBX_EXCLUSIVE) == 0) {
|
||||||
rc = mdbx_lck_op(env->me_lfd, OP_SETLKW, F_RDLCK, 0, 1);
|
rc = lck_op(env->me_fd, op_setlk, F_UNLCK, 0, env->me_pid);
|
||||||
|
if (rc == MDBX_SUCCESS)
|
||||||
|
rc = lck_op(env->me_fd, op_setlk, F_UNLCK, env->me_pid + 1,
|
||||||
|
OFF_T_MAX - env->me_pid - 1);
|
||||||
|
}
|
||||||
|
if (rc == MDBX_SUCCESS)
|
||||||
|
rc = lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1);
|
||||||
if (unlikely(rc != 0)) {
|
if (unlikely(rc != 0)) {
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "lck", rc);
|
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "lck", rc);
|
||||||
goto bailout;
|
assert(MDBX_IS_ERROR(rc));
|
||||||
}
|
}
|
||||||
if (complete) {
|
return rc;
|
||||||
rc = mdbx_lck_op(env->me_fd, OP_SETLK,
|
}
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
|
|
||||||
env->me_pid, 1);
|
MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
|
||||||
if (unlikely(rc != 0)) {
|
MDBX_env *inprocess_neighbor) {
|
||||||
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "dxb", rc);
|
int rc = MDBX_SUCCESS;
|
||||||
goto bailout;
|
if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor &&
|
||||||
|
env->me_lck &&
|
||||||
|
/* try get exclusive access */
|
||||||
|
lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 &&
|
||||||
|
lck_op(env->me_fd, op_setlk,
|
||||||
|
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX)) {
|
||||||
|
mdbx_info("%s: got exclusive, drown mutexes", mdbx_func_);
|
||||||
|
rc = pthread_mutex_destroy(&env->me_lck->mti_rmutex);
|
||||||
|
if (rc == 0)
|
||||||
|
rc = pthread_mutex_destroy(&env->me_lck->mti_wmutex);
|
||||||
|
mdbx_assert(env, rc == 0);
|
||||||
|
if (rc == 0) {
|
||||||
|
memset(env->me_lck, 0x81, sizeof(MDBX_lockinfo));
|
||||||
|
msync(env->me_lck, env->me_os_psize, MS_ASYNC);
|
||||||
|
}
|
||||||
|
mdbx_jitter4testing(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1) POSIX's fcntl() locks (i.e. when op_setlk == F_SETLK) should be restored
|
||||||
|
* after file was closed.
|
||||||
|
*
|
||||||
|
* 2) File locks would be released (by kernel) while the file-descriptors will
|
||||||
|
* be closed. But to avoid false-positive EACCESS and EDEADLK from the kernel,
|
||||||
|
* locks should be released here explicitly with properly order. */
|
||||||
|
|
||||||
|
/* close dxb and restore lock */
|
||||||
|
if (env->me_fd != INVALID_HANDLE_VALUE) {
|
||||||
|
if (unlikely(close(env->me_fd) != 0) && rc == MDBX_SUCCESS)
|
||||||
|
rc = errno;
|
||||||
|
env->me_fd = INVALID_HANDLE_VALUE;
|
||||||
|
if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) {
|
||||||
|
/* restore file-lock */
|
||||||
|
rc = lck_op(
|
||||||
|
inprocess_neighbor->me_fd, F_SETLKW,
|
||||||
|
(inprocess_neighbor->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
|
||||||
|
(inprocess_neighbor->me_flags & MDBX_EXCLUSIVE)
|
||||||
|
? 0
|
||||||
|
: inprocess_neighbor->me_pid,
|
||||||
|
(inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) ? OFF_T_MAX : 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return MDBX_SUCCESS;
|
|
||||||
|
|
||||||
bailout:
|
/* close clk and restore locks */
|
||||||
(void)mdbx_lck_op(env->me_lfd, OP_SETLK, F_UNLCK, 0, OFF_T_MAX);
|
if (env->me_lfd != INVALID_HANDLE_VALUE) {
|
||||||
(void)mdbx_lck_op(env->me_fd, OP_SETLK, F_UNLCK, 0, OFF_T_MAX);
|
if (unlikely(close(env->me_lfd) != 0) && rc == MDBX_SUCCESS)
|
||||||
assert(MDBX_IS_ERROR(rc));
|
rc = errno;
|
||||||
|
env->me_lfd = INVALID_HANDLE_VALUE;
|
||||||
|
if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) {
|
||||||
|
/* restore file-locks */
|
||||||
|
rc = lck_op(inprocess_neighbor->me_lfd, F_SETLKW, F_RDLCK, 0, 1);
|
||||||
|
if (rc == MDBX_SUCCESS && inprocess_neighbor->me_live_reader)
|
||||||
|
rc = mdbx_rpid_set(inprocess_neighbor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inprocess_neighbor && rc != MDBX_SUCCESS)
|
||||||
|
inprocess_neighbor->me_flags |= MDBX_FATAL_ERROR;
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -231,7 +393,7 @@ static int mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex,
|
|||||||
|
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
|
MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
|
||||||
int global_uniqueness_flag) {
|
int global_uniqueness_flag) {
|
||||||
if (global_uniqueness_flag == MDBX_RESULT_FALSE)
|
if (global_uniqueness_flag != MDBX_RESULT_TRUE)
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
|
|
||||||
pthread_mutexattr_t ma;
|
pthread_mutexattr_t ma;
|
||||||
@ -244,7 +406,12 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
|
|||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
#if MDBX_USE_ROBUST
|
#if MDBX_USE_ROBUST
|
||||||
|
#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) && \
|
||||||
|
!defined(pthread_mutex_consistent) && _POSIX_C_SOURCE < 200809L
|
||||||
|
rc = pthread_mutexattr_setrobust_np(&ma, PTHREAD_MUTEX_ROBUST_NP);
|
||||||
|
#else
|
||||||
rc = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
rc = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
||||||
|
#endif
|
||||||
if (rc)
|
if (rc)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
#endif /* MDBX_USE_ROBUST */
|
#endif /* MDBX_USE_ROBUST */
|
||||||
@ -271,75 +438,8 @@ bailout:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
|
|
||||||
MDBX_env *inprocess_neighbor) {
|
|
||||||
/* File locks would be released (by kernel) while the file-descriptors
|
|
||||||
* will be closed. But to avoid false-positive EDEADLK from the kernel,
|
|
||||||
* locks should be released here explicitly with properly order. */
|
|
||||||
|
|
||||||
if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor &&
|
|
||||||
env->me_lck &&
|
|
||||||
/* try get exclusive access */
|
|
||||||
mdbx_lck_op(env->me_fd, OP_SETLK,
|
|
||||||
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
|
|
||||||
OFF_T_MAX) == 0 &&
|
|
||||||
mdbx_lck_op(env->me_lfd, OP_SETLK, F_WRLCK, 0, OFF_T_MAX) == 0) {
|
|
||||||
mdbx_info("%s: got exclusive, drown mutexes", mdbx_func_);
|
|
||||||
int rc = pthread_mutex_destroy(&env->me_lck->mti_rmutex);
|
|
||||||
if (rc == 0)
|
|
||||||
rc = pthread_mutex_destroy(&env->me_lck->mti_wmutex);
|
|
||||||
assert(rc == 0);
|
|
||||||
(void)rc;
|
|
||||||
msync(env->me_lck, env->me_os_psize, MS_ASYNC);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* POSIX's fcntl() locks should be restored after file was closed.
|
|
||||||
* FIXME: This code should be rethinked and retested, since it will executed
|
|
||||||
* in really rare cases. For instance, this code could wait a lot, if other
|
|
||||||
* process get exclusive access immediately after the close().
|
|
||||||
*
|
|
||||||
* On the other hand, seems more reasonable to disallow multi-open feature
|
|
||||||
* by default, and describe it as "use at your own risk". Currently
|
|
||||||
* multi-open required only for libfpta's unit-tests. */
|
|
||||||
|
|
||||||
int rc = MDBX_SUCCESS;
|
|
||||||
/* close clk and restore locks */
|
|
||||||
if (env->me_lfd != INVALID_HANDLE_VALUE) {
|
|
||||||
(void)close(env->me_lfd);
|
|
||||||
env->me_lfd = INVALID_HANDLE_VALUE;
|
|
||||||
if (inprocess_neighbor) {
|
|
||||||
/* restore file-locks */
|
|
||||||
if (rc == MDBX_SUCCESS)
|
|
||||||
rc = mdbx_lck_op(inprocess_neighbor->me_lfd, OP_SETLKW, F_RDLCK, 0, 1);
|
|
||||||
if (rc == MDBX_SUCCESS)
|
|
||||||
rc = mdbx_rpid_set(inprocess_neighbor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* close dxb and restore lock */
|
|
||||||
if (env->me_fd != INVALID_HANDLE_VALUE) {
|
|
||||||
(void)close(env->me_fd);
|
|
||||||
env->me_fd = INVALID_HANDLE_VALUE;
|
|
||||||
if (inprocess_neighbor && rc == MDBX_SUCCESS) {
|
|
||||||
/* restore file-lock */
|
|
||||||
rc = mdbx_lck_op(
|
|
||||||
inprocess_neighbor->me_fd, OP_SETLKW,
|
|
||||||
(inprocess_neighbor->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
|
|
||||||
(inprocess_neighbor->me_lfd == INVALID_HANDLE_VALUE)
|
|
||||||
? 0
|
|
||||||
: inprocess_neighbor->me_pid,
|
|
||||||
(inprocess_neighbor->me_lfd == INVALID_HANDLE_VALUE) ? OFF_T_MAX : 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inprocess_neighbor && rc != MDBX_SUCCESS) {
|
|
||||||
inprocess_neighbor->me_flags |= MDBX_FATAL_ERROR;
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
return MDBX_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mdbx_robust_lock(MDBX_env *env, pthread_mutex_t *mutex) {
|
static int mdbx_robust_lock(MDBX_env *env, pthread_mutex_t *mutex) {
|
||||||
|
mdbx_jitter4testing(true);
|
||||||
int rc = pthread_mutex_lock(mutex);
|
int rc = pthread_mutex_lock(mutex);
|
||||||
if (unlikely(rc != 0))
|
if (unlikely(rc != 0))
|
||||||
rc = mdbx_mutex_failed(env, mutex, rc);
|
rc = mdbx_mutex_failed(env, mutex, rc);
|
||||||
@ -347,6 +447,7 @@ static int mdbx_robust_lock(MDBX_env *env, pthread_mutex_t *mutex) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int mdbx_robust_trylock(MDBX_env *env, pthread_mutex_t *mutex) {
|
static int mdbx_robust_trylock(MDBX_env *env, pthread_mutex_t *mutex) {
|
||||||
|
mdbx_jitter4testing(true);
|
||||||
int rc = pthread_mutex_trylock(mutex);
|
int rc = pthread_mutex_trylock(mutex);
|
||||||
if (unlikely(rc != 0 && rc != EBUSY))
|
if (unlikely(rc != 0 && rc != EBUSY))
|
||||||
rc = mdbx_mutex_failed(env, mutex, rc);
|
rc = mdbx_mutex_failed(env, mutex, rc);
|
||||||
@ -355,6 +456,7 @@ static int mdbx_robust_trylock(MDBX_env *env, pthread_mutex_t *mutex) {
|
|||||||
|
|
||||||
static int mdbx_robust_unlock(MDBX_env *env, pthread_mutex_t *mutex) {
|
static int mdbx_robust_unlock(MDBX_env *env, pthread_mutex_t *mutex) {
|
||||||
int rc = pthread_mutex_unlock(mutex);
|
int rc = pthread_mutex_unlock(mutex);
|
||||||
|
mdbx_jitter4testing(true);
|
||||||
if (unlikely(rc != 0))
|
if (unlikely(rc != 0))
|
||||||
rc = mdbx_mutex_failed(env, mutex, rc);
|
rc = mdbx_mutex_failed(env, mutex, rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -414,7 +516,12 @@ static int __cold mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex,
|
|||||||
int check_rc = mdbx_reader_check0(env, rlocked, NULL);
|
int check_rc = mdbx_reader_check0(env, rlocked, NULL);
|
||||||
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
|
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
|
||||||
|
|
||||||
|
#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12) && \
|
||||||
|
!defined(pthread_mutex_consistent) && _POSIX_C_SOURCE < 200809L
|
||||||
|
int mreco_rc = pthread_mutex_consistent_np(mutex);
|
||||||
|
#else
|
||||||
int mreco_rc = pthread_mutex_consistent(mutex);
|
int mreco_rc = pthread_mutex_consistent(mutex);
|
||||||
|
#endif
|
||||||
check_rc = (mreco_rc == 0) ? check_rc : mreco_rc;
|
check_rc = (mreco_rc == 0) ? check_rc : mreco_rc;
|
||||||
|
|
||||||
if (unlikely(mreco_rc))
|
if (unlikely(mreco_rc))
|
||||||
|
@ -341,17 +341,32 @@ mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) {
|
|||||||
/* global `initial` lock for lockfile initialization,
|
/* global `initial` lock for lockfile initialization,
|
||||||
* exclusive/shared locking first cacheline */
|
* exclusive/shared locking first cacheline */
|
||||||
|
|
||||||
/* FIXME: locking schema/algo descritpion.
|
/* Briefly descritpion of locking schema/algorithm:
|
||||||
?-? = free
|
* - Windows does not support upgrading or downgrading for file locking.
|
||||||
S-? = used
|
* - Therefore upgrading/downgrading is emulated by shared and exclusive
|
||||||
E-? = exclusive-read
|
* locking of upper and lower halves.
|
||||||
?-S
|
* - In other words, we have FSM with possible 9 states,
|
||||||
?-E = middle
|
* i.e. free/shared/exclusive x free/shared/exclusive == 9.
|
||||||
S-S
|
* Only 6 states of FSM are used, which 2 of ones are transitive.
|
||||||
S-E = locked
|
*
|
||||||
E-S
|
* The mdbx_lck_seize() moves the locking-FSM from the initial free/unlocked
|
||||||
E-E = exclusive-write
|
* state to the "exclusive write" (and returns MDBX_RESULT_TRUE) if possible,
|
||||||
*/
|
* or to the "used" (and returns MDBX_RESULT_FALSE).
|
||||||
|
*
|
||||||
|
* The mdbx_lck_downgrade() moves the locking-FSM from "exclusive write"
|
||||||
|
* state to the "used" (i.e. shared) state.
|
||||||
|
*
|
||||||
|
* States:
|
||||||
|
* ?-? = free, i.e. unlocked
|
||||||
|
* S-? = used, i.e. shared lock
|
||||||
|
* E-? = exclusive-read, i.e. operational exclusive
|
||||||
|
* ?-S
|
||||||
|
* ?-E = middle (transitive state)
|
||||||
|
* S-S
|
||||||
|
* S-E = locked (transitive state)
|
||||||
|
* E-S
|
||||||
|
* E-E = exclusive-write, i.e. exclusive due (re)initialization
|
||||||
|
*/
|
||||||
|
|
||||||
static void lck_unlock(MDBX_env *env) {
|
static void lck_unlock(MDBX_env *env) {
|
||||||
int rc;
|
int rc;
|
||||||
@ -414,8 +429,8 @@ MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE)
|
/* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE)
|
||||||
* or as 'used' (S-? and returns MDBX_RESULT_FALSE), otherwise returns an error
|
* or as 'used' (S-? and returns MDBX_RESULT_FALSE).
|
||||||
*/
|
* Oherwise returns an error. */
|
||||||
static int internal_seize_lck(HANDLE lfd) {
|
static int internal_seize_lck(HANDLE lfd) {
|
||||||
int rc;
|
int rc;
|
||||||
assert(lfd != INVALID_HANDLE_VALUE);
|
assert(lfd != INVALID_HANDLE_VALUE);
|
||||||
@ -511,23 +526,25 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
|
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
|
||||||
/* Transite from exclusive state (E-?) to used (S-?) */
|
/* Transite from exclusive state (E-?) to used (S-?) */
|
||||||
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
assert(env->me_fd != INVALID_HANDLE_VALUE);
|
||||||
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
assert(env->me_lfd != INVALID_HANDLE_VALUE);
|
||||||
|
|
||||||
|
#if 1
|
||||||
if (env->me_flags & MDBX_EXCLUSIVE)
|
if (env->me_flags & MDBX_EXCLUSIVE)
|
||||||
return MDBX_SUCCESS /* nope since files were must be opened non-shareable */
|
return MDBX_SUCCESS /* nope since files were must be opened non-shareable */
|
||||||
;
|
;
|
||||||
|
#else
|
||||||
/* 1) must be at E-E (exclusive-write) */
|
/* 1) must be at E-E (exclusive-write) */
|
||||||
if (!complete) {
|
if (env->me_flags & MDBX_EXCLUSIVE) {
|
||||||
/* transite from E-E to E_? (exclusive-read) */
|
/* transite from E-E to E_? (exclusive-read) */
|
||||||
if (!funlock(env->me_lfd, LCK_UPPER))
|
if (!funlock(env->me_lfd, LCK_UPPER))
|
||||||
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
|
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
|
||||||
"E-E(exclusive-write) >> E-?(exclusive-read)", GetLastError());
|
"E-E(exclusive-write) >> E-?(exclusive-read)", GetLastError());
|
||||||
return MDBX_SUCCESS /* 2) now at E-? (exclusive-read), done */;
|
return MDBX_SUCCESS /* 2) now at E-? (exclusive-read), done */;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* 3) now at E-E (exclusive-write), transite to ?_E (middle) */
|
/* 3) now at E-E (exclusive-write), transite to ?_E (middle) */
|
||||||
if (!funlock(env->me_lfd, LCK_LOWER))
|
if (!funlock(env->me_lfd, LCK_LOWER))
|
||||||
|
@ -698,12 +698,17 @@ MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
|
|||||||
MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env);
|
MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env);
|
||||||
|
|
||||||
/// \brief Снижает уровень первоначальной захваченной блокировки до
|
/// \brief Снижает уровень первоначальной захваченной блокировки до
|
||||||
/// операционного уровня определяемого аргументом.
|
/// операционного уровня определяемого аргументом. Смысл функции в возврате
|
||||||
/// \param
|
/// в операционный режим:
|
||||||
/// complete = TRUE - понижение до разделяемой блокировки.
|
/// - разблокирование других процессов ожидающих доступа, т.е если
|
||||||
/// complete = FALSE - понижение до эксклюзивной операционной блокировки.
|
/// (env->me_flags & MDBX_EXCLUSIVE) != 0, то другие процессы должны узнать
|
||||||
|
/// о невозможности доступа, а не ждать его.
|
||||||
|
/// - снятия блокировок мешающих работе с файлом (актуально для Windows).
|
||||||
|
/// (env->me_flags & MDBX_EXCLUSIVE) == 0 - понижение до разделяемой
|
||||||
|
/// блокировки. (env->me_flags & MDBX_EXCLUSIVE) != 0 - понижение до
|
||||||
|
/// эксклюзивной операционной блокировки.
|
||||||
/// \return Код ошибки или 0 в случае успеха.
|
/// \return Код ошибки или 0 в случае успеха.
|
||||||
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env, bool complete);
|
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env);
|
||||||
|
|
||||||
/// \brief Блокирует lck-файл и/или таблицу читателей для (де)регистрации.
|
/// \brief Блокирует lck-файл и/или таблицу читателей для (де)регистрации.
|
||||||
/// \return Код ошибки или 0 в случае успеха.
|
/// \return Код ошибки или 0 в случае успеха.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user