libmdbx/src/lck-windows.c

567 lines
17 KiB
C
Raw Normal View History

2017-10-26 21:47:41 +03:00
/*
* Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
#include "./bits.h"
/* PREAMBLE FOR WINDOWS:
*
* We are not concerned for performance here.
* If you are running Windows a performance could NOT be the goal.
* Otherwise please use Linux.
*
* Regards,
* LY
*/
/*----------------------------------------------------------------------------*/
/* rthc */
static CRITICAL_SECTION rthc_critical_section;
static void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved) {
(void)module;
(void)reserved;
switch (reason) {
case DLL_PROCESS_ATTACH:
InitializeCriticalSection(&rthc_critical_section);
break;
case DLL_PROCESS_DETACH:
DeleteCriticalSection(&rthc_critical_section);
break;
case DLL_THREAD_ATTACH:
break;
case DLL_THREAD_DETACH:
mdbx_rthc_cleanup();
break;
}
}
void mdbx_rthc_lock(void) { EnterCriticalSection(&rthc_critical_section); }
void mdbx_rthc_unlock(void) { LeaveCriticalSection(&rthc_critical_section); }
/* *INDENT-OFF* */
/* clang-format off */
#if defined(_MSC_VER)
# pragma const_seg(push)
# pragma data_seg(push)
# ifdef _WIN64
/* kick a linker to create the TLS directory if not already done */
# pragma comment(linker, "/INCLUDE:_tls_used")
/* Force some symbol references. */
# pragma comment(linker, "/INCLUDE:mdbx_tls_callback")
/* specific const-segment for WIN64 */
# pragma const_seg(".CRT$XLB")
const
# else
/* kick a linker to create the TLS directory if not already done */
# pragma comment(linker, "/INCLUDE:__tls_used")
/* Force some symbol references. */
# pragma comment(linker, "/INCLUDE:_mdbx_tls_callback")
/* specific data-segment for WIN32 */
# pragma data_seg(".CRT$XLB")
# endif
PIMAGE_TLS_CALLBACK mdbx_tls_callback = tls_callback;
# pragma data_seg(pop)
# pragma const_seg(pop)
#elif defined(__GNUC__)
# ifdef _WIN64
const
# endif
PIMAGE_TLS_CALLBACK mdbx_tls_callback __attribute__((section(".CRT$XLB"), used))
= tls_callback;
#else
# error FIXME
#endif
/* *INDENT-ON* */
/* clang-format on */
/*----------------------------------------------------------------------------*/
#define LCK_SHARED 0
#define LCK_EXCLUSIVE LOCKFILE_EXCLUSIVE_LOCK
#define LCK_WAITFOR 0
#define LCK_DONTWAIT LOCKFILE_FAIL_IMMEDIATELY
static __inline BOOL flock(mdbx_filehandle_t fd, DWORD flags, uint64_t offset,
size_t bytes) {
OVERLAPPED ov;
ov.hEvent = 0;
ov.Offset = (DWORD)offset;
ov.OffsetHigh = HIGH_DWORD(offset);
return LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov);
}
static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset,
size_t bytes) {
return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes,
HIGH_DWORD(bytes));
}
/*----------------------------------------------------------------------------*/
/* global `write` lock for write-txt processing,
* exclusive locking both meta-pages) */
#define LCK_MAXLEN (1u + (size_t)(MAXSSIZE_T))
#define LCK_META_OFFSET 0
#define LCK_META_LEN 0x10000u
#define LCK_BODY_OFFSET LCK_META_LEN
#define LCK_BODY_LEN (LCK_MAXLEN - LCK_BODY_OFFSET + 1u)
#define LCK_META LCK_META_OFFSET, LCK_META_LEN
#define LCK_BODY LCK_BODY_OFFSET, LCK_BODY_LEN
#define LCK_WHOLE 0, LCK_MAXLEN
2017-10-26 21:47:41 +03:00
int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
if (flock(env->me_fd, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
: (LCK_EXCLUSIVE | LCK_WAITFOR),
LCK_BODY))
return MDBX_SUCCESS;
int rc = GetLastError();
2017-10-26 21:47:41 +03:00
return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY;
}
2017-05-24 01:42:10 +03:00
void mdbx_txn_unlock(MDBX_env *env) {
if (!funlock(env->me_fd, LCK_BODY))
mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
}
/*----------------------------------------------------------------------------*/
/* global `read` lock for readers registration,
* exclusive locking `mti_numreaders` (second) cacheline */
#define LCK_LO_OFFSET 0
#define LCK_LO_LEN offsetof(MDBX_lockinfo, mti_numreaders)
#define LCK_UP_OFFSET LCK_LO_LEN
#define LCK_UP_LEN (MDBX_LOCKINFO_WHOLE_SIZE - LCK_UP_OFFSET)
#define LCK_LOWER LCK_LO_OFFSET, LCK_LO_LEN
#define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN
2017-05-24 01:42:10 +03:00
int mdbx_rdt_lock(MDBX_env *env) {
AcquireSRWLockShared(&env->me_remap_guard);
if (env->me_lfd == INVALID_HANDLE_VALUE)
2017-05-24 01:42:10 +03:00
return MDBX_SUCCESS; /* readonly database in readonly filesystem */
/* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */
if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
2017-05-24 01:42:10 +03:00
return MDBX_SUCCESS;
return GetLastError();
}
2017-05-24 01:42:10 +03:00
void mdbx_rdt_unlock(MDBX_env *env) {
if (env->me_lfd != INVALID_HANDLE_VALUE) {
/* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */
if (!funlock(env->me_lfd, LCK_UPPER))
mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
}
ReleaseSRWLockShared(&env->me_remap_guard);
}
static int suspend_and_append(mdbx_handle_array_t **array,
const DWORD ThreadId) {
const unsigned limit = (*array)->limit;
if ((*array)->count == limit) {
void *ptr = realloc((limit > ARRAY_LENGTH((*array)->handles))
? *array
: /* don't free initial array on the stack */ NULL,
sizeof(mdbx_handle_array_t) +
sizeof(HANDLE) *
(limit * 2 - ARRAY_LENGTH((*array)->handles)));
if (!ptr)
return MDBX_ENOMEM;
(*array) = (mdbx_handle_array_t *)ptr;
(*array)->limit = limit * 2;
}
HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME, FALSE, ThreadId);
if (hThread == NULL)
return GetLastError();
if (SuspendThread(hThread) == -1) {
CloseHandle(hThread);
return GetLastError();
}
(*array)->handles[(*array)->count++] = hThread;
return MDBX_SUCCESS;
}
int mdbx_suspend_threads_before_remap(MDBX_env *env,
mdbx_handle_array_t **array) {
const mdbx_pid_t CurrentTid = GetCurrentThreadId();
int rc;
if (env->me_lck) {
/* Scan LCK for threads of the current process */
const MDBX_reader *const begin = env->me_lck->mti_readers;
const MDBX_reader *const end = begin + env->me_lck->mti_numreaders;
const mdbx_tid_t WriteTxnOwner = env->me_txn0 ? env->me_txn0->mt_owner : 0;
for (const MDBX_reader *reader = begin; reader < end; ++reader) {
if (reader->mr_pid != env->me_pid || !reader->mr_tid) {
skip_lck:
continue;
}
if (reader->mr_tid == CurrentTid || reader->mr_tid == WriteTxnOwner)
goto skip_lck;
if (env->me_flags & MDBX_NOTLS) {
/* Skip duplicates in no-tls mode */
for (const MDBX_reader *scan = reader; --scan >= begin;)
if (scan->mr_tid == reader->mr_tid)
goto skip_lck;
}
rc = suspend_and_append(array, reader->mr_tid);
if (rc != MDBX_SUCCESS) {
bailout_lck:
(void)mdbx_resume_threads_after_remap(*array);
return rc;
}
}
if (WriteTxnOwner && WriteTxnOwner != CurrentTid) {
rc = suspend_and_append(array, WriteTxnOwner);
if (rc != MDBX_SUCCESS)
goto bailout_lck;
}
} else {
/* Without LCK (i.e. read-only mode).
* Walk thougth a snapshot of all running threads */
mdbx_assert(env, env->me_txn0 == NULL);
const HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
if (hSnapshot == INVALID_HANDLE_VALUE)
return GetLastError();
THREADENTRY32 entry;
entry.dwSize = sizeof(THREADENTRY32);
if (!Thread32First(hSnapshot, &entry)) {
rc = GetLastError();
bailout_toolhelp:
CloseHandle(hSnapshot);
(void)mdbx_resume_threads_after_remap(*array);
return rc;
}
do {
if (entry.th32OwnerProcessID != env->me_pid ||
entry.th32ThreadID == CurrentTid)
continue;
rc = suspend_and_append(array, entry.th32ThreadID);
if (rc != MDBX_SUCCESS)
goto bailout_toolhelp;
} while (Thread32Next(hSnapshot, &entry));
rc = GetLastError();
if (rc != ERROR_NO_MORE_FILES)
goto bailout_toolhelp;
CloseHandle(hSnapshot);
}
return MDBX_SUCCESS;
}
int mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) {
int rc = MDBX_SUCCESS;
for (unsigned i = 0; i < array->count; ++i) {
if (ResumeThread(array->handles[i]) == -1)
rc = GetLastError();
CloseHandle(array->handles[i]);
}
return rc;
}
/*----------------------------------------------------------------------------*/
/* global `initial` lock for lockfile initialization,
* exclusive/shared locking first cacheline */
/* FIXME: locking schema/algo descritpion.
?-? = free
S-? = used
E-? = exclusive-read
?-S
?-E = middle
S-S
S-E = locked
E-S
E-E = exclusive-write
*/
2017-05-24 01:42:10 +03:00
int mdbx_lck_init(MDBX_env *env) {
(void)env;
2017-05-24 01:42:10 +03:00
return MDBX_SUCCESS;
}
/* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE)
* or as 'used' (S-? and returns MDBX_RESULT_FALSE), otherwise returns an error
*/
static int internal_seize_lck(HANDLE lfd) {
int rc;
assert(lfd != INVALID_HANDLE_VALUE);
/* 1) now on ?-? (free), get ?-E (middle) */
mdbx_jitter4testing(false);
if (!flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) {
rc = GetLastError() /* 2) something went wrong, give up */;
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
"?-?(free) >> ?-E(middle)", rc);
return rc;
}
/* 3) now on ?-E (middle), try E-E (exclusive-write) */
mdbx_jitter4testing(false);
if (flock(lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER))
return MDBX_RESULT_TRUE /* 4) got E-E (exclusive-write), done */;
/* 5) still on ?-E (middle) */
rc = GetLastError();
mdbx_jitter4testing(false);
if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
/* 6) something went wrong, give up */
2017-05-15 12:08:04 +03:00
if (!funlock(lfd, LCK_UPPER))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
2017-05-15 12:08:04 +03:00
"?-E(middle) >> ?-?(free)", GetLastError());
return rc;
}
/* 7) still on ?-E (middle), try S-E (locked) */
mdbx_jitter4testing(false);
rc = flock(lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER) ? MDBX_RESULT_FALSE
: GetLastError();
mdbx_jitter4testing(false);
if (rc != MDBX_RESULT_FALSE)
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
"?-E(middle) >> S-E(locked)", rc);
/* 8) now on S-E (locked) or still on ?-E (middle),
* transite to S-? (used) or ?-? (free) */
2017-05-15 12:08:04 +03:00
if (!funlock(lfd, LCK_UPPER))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
2017-05-15 12:08:04 +03:00
"X-E(locked/middle) >> X-?(used/free)", GetLastError());
/* 9) now on S-? (used, DONE) or ?-? (free, FAILURE) */
return rc;
}
2017-05-24 01:42:10 +03:00
int mdbx_lck_seize(MDBX_env *env) {
int rc;
assert(env->me_fd != INVALID_HANDLE_VALUE);
if (env->me_lfd == INVALID_HANDLE_VALUE) {
/* LY: without-lck mode (e.g. on read-only filesystem) */
mdbx_jitter4testing(false);
if (!flock(env->me_fd, LCK_SHARED | LCK_DONTWAIT, LCK_WHOLE)) {
rc = GetLastError();
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc);
return rc;
}
return MDBX_RESULT_FALSE;
}
rc = internal_seize_lck(env->me_lfd);
mdbx_jitter4testing(false);
2017-05-24 01:42:10 +03:00
if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_RDONLY) == 0) {
/* Check that another process don't operates in without-lck mode.
* Doing such check by exclusive locking the body-part of db. Should be
* noted:
* - we need an exclusive lock for do so;
* - we can't lock meta-pages, otherwise other process could get an error
* while opening db in valid (non-conflict) mode. */
if (!flock(env->me_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY)) {
rc = GetLastError();
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
"lock-against-without-lck", rc);
mdbx_jitter4testing(false);
mdbx_lck_destroy(env);
} else {
mdbx_jitter4testing(false);
2017-05-15 12:08:04 +03:00
if (!funlock(env->me_fd, LCK_BODY))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
2017-05-15 12:08:04 +03:00
"unlock-against-without-lck", GetLastError());
}
}
return rc;
}
int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
/* Transite from exclusive state (E-?) to used (S-?) */
assert(env->me_fd != INVALID_HANDLE_VALUE);
2017-06-06 20:18:09 +03:00
assert(env->me_lfd != INVALID_HANDLE_VALUE);
/* 1) must be at E-E (exclusive-write) */
if (!complete) {
/* transite from E-E to E_? (exclusive-read) */
if (!funlock(env->me_lfd, LCK_UPPER))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
"E-E(exclusive-write) >> E-?(exclusive-read)", GetLastError());
return MDBX_SUCCESS /* 2) now at E-? (exclusive-read), done */;
}
/* 3) now at E-E (exclusive-write), transite to ?_E (middle) */
2017-06-06 20:18:09 +03:00
if (!funlock(env->me_lfd, LCK_LOWER))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
"E-E(exclusive-write) >> ?-E(middle)", GetLastError());
/* 4) now at ?-E (middle), transite to S-E (locked) */
2017-06-06 20:18:09 +03:00
if (!flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)) {
int rc = GetLastError() /* 5) something went wrong, give up */;
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
"?-E(middle) >> S-E(locked)", rc);
2017-06-06 20:18:09 +03:00
return rc;
}
2017-06-06 20:18:09 +03:00
/* 6) got S-E (locked), continue transition to S-? (used) */
2017-06-06 20:18:09 +03:00
if (!funlock(env->me_lfd, LCK_UPPER))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
"S-E(locked) >> S-?(used)", GetLastError());
return MDBX_SUCCESS /* 7) now at S-? (used), done */;
}
2017-06-06 20:18:09 +03:00
int mdbx_lck_upgrade(MDBX_env *env) {
/* Transite from locked state (S-E) to exclusive-write (E-E) */
2017-06-06 20:18:09 +03:00
assert(env->me_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE);
/* 1) must be at S-E (locked), transite to ?_E (middle) */
if (!funlock(env->me_lfd, LCK_LOWER))
mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
"S-E(locked) >> ?-E(middle)", GetLastError());
/* 3) now on ?-E (middle), try E-E (exclusive-write) */
2017-06-06 20:18:09 +03:00
mdbx_jitter4testing(false);
if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER))
return MDBX_RESULT_TRUE; /* 4) got E-E (exclusive-write), done */
2017-06-06 20:18:09 +03:00
/* 5) still on ?-E (middle) */
int rc = GetLastError();
2017-06-06 20:18:09 +03:00
mdbx_jitter4testing(false);
if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
/* 6) something went wrong, report but continue */
mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
"?-E(middle) >> E-E(exclusive-write)", rc);
2017-06-06 20:18:09 +03:00
}
/* 7) still on ?-E (middle), try restore S-E (locked) */
mdbx_jitter4testing(false);
rc = flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)
? MDBX_RESULT_FALSE
: GetLastError();
2017-06-06 20:18:09 +03:00
mdbx_jitter4testing(false);
if (rc != MDBX_RESULT_FALSE) {
mdbx_fatal("%s(%s) failed: errcode %u", mdbx_func_,
"?-E(middle) >> S-E(locked)", rc);
return rc;
}
/* 8) now on S-E (locked) */
return MDBX_RESULT_FALSE;
}
2017-05-24 01:42:10 +03:00
void mdbx_lck_destroy(MDBX_env *env) {
int rc;
if (env->me_lfd != INVALID_HANDLE_VALUE) {
/* double `unlock` for robustly remove overlapped shared/exclusive locks */
while (funlock(env->me_lfd, LCK_LOWER))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
while (funlock(env->me_lfd, LCK_UPPER))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
}
if (env->me_fd != INVALID_HANDLE_VALUE) {
/* explicitly unlock to avoid latency for other processes (windows kernel
2017-06-06 20:18:09 +03:00
* releases such locks via deferred queues) */
while (funlock(env->me_fd, LCK_BODY))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
while (funlock(env->me_fd, LCK_META))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
while (funlock(env->me_fd, LCK_WHOLE))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
}
}
/*----------------------------------------------------------------------------*/
/* reader checking (by pid) */
2017-05-24 01:42:10 +03:00
int mdbx_rpid_set(MDBX_env *env) {
(void)env;
2017-05-24 01:42:10 +03:00
return MDBX_SUCCESS;
}
2017-05-24 01:42:10 +03:00
int mdbx_rpid_clear(MDBX_env *env) {
(void)env;
2017-05-24 01:42:10 +03:00
return MDBX_SUCCESS;
}
2017-04-21 18:26:32 +03:00
/* Checks reader by pid.
*
* Returns:
* MDBX_RESULT_TRUE, if pid is live (unable to acquire lock)
* MDBX_RESULT_FALSE, if pid is dead (lock acquired)
* or otherwise the errcode. */
2017-05-24 01:42:10 +03:00
int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) {
(void)env;
HANDLE hProcess = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid);
int rc;
if (hProcess) {
rc = WaitForSingleObject(hProcess, 0);
CloseHandle(hProcess);
} else {
rc = GetLastError();
}
switch (rc) {
case ERROR_INVALID_PARAMETER:
/* pid seem invalid */
return MDBX_RESULT_FALSE;
case WAIT_OBJECT_0:
/* process just exited */
return MDBX_RESULT_FALSE;
case WAIT_TIMEOUT:
/* pid running */
return MDBX_RESULT_TRUE;
default:
/* failure */
return rc;
}
}