mdbx: merge branch 'osx-try'.

This resolves https://github.com/leo-yuriev/libmdbx/issues/49.

Change-Id: Ib20c3898e99ca229f10e7d41cda3989b8b4a832c
This commit is contained in:
Leonid Yuriev
2019-08-20 03:18:31 +03:00
18 changed files with 423 additions and 85 deletions

View File

@@ -23,6 +23,12 @@
# undef NDEBUG
#endif
#define MDBX_OSX_WANNA_DURABILITY 0 /* using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
#define MDBX_OSX_WANNA_SPEED 1 /* using fsync() with chance of data lost on power failure */
#ifndef MDBX_OSX_SPEED_OR_DURABILITY
#define MDBX_OSX_SPEED_OR_DURABILITY MDBX_OSX_WANNA_DURABILITY
#endif
/*----------------------------------------------------------------------------*/
/* Should be defined before any includes */
@@ -30,6 +36,10 @@
# define _FILE_OFFSET_BITS 64
#endif
#ifdef __APPLE__
#define _DARWIN_C_SOURCE
#endif
#ifdef _MSC_VER
# if _MSC_VER < 1400
# error "Microsoft Visual C++ 8.0 (Visual Studio 2005) or later version is required"

View File

@@ -18,7 +18,7 @@
* even though they don't support Robust Mutexes.
* Compile with -DMDBX_USE_ROBUST=0. */
#ifndef MDBX_USE_ROBUST
#if defined(EOWNERDEAD) || _POSIX_C_SOURCE >= 200809L
#if (defined(EOWNERDEAD) || _POSIX_C_SOURCE >= 200809L) && !defined(__APPLE__)
#define MDBX_USE_ROBUST 1
#else
#define MDBX_USE_ROBUST 0

View File

@@ -292,6 +292,16 @@ static CRITICAL_SECTION rthc_critical_section;
#else
int __cxa_thread_atexit_impl(void (*dtor)(void *), void *obj, void *dso_symbol)
__attribute__((weak));
#ifdef __APPLE__ /* FIXME: Thread-Local Storage destructors & DSO-unloading */
int __cxa_thread_atexit_impl(void (*dtor)(void *), void *obj,
void *dso_symbol) {
(void)dtor;
(void)obj;
(void)dso_symbol;
return -1;
}
#endif /* __APPLE__ */
static pthread_mutex_t mdbx_rthc_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t mdbx_rthc_cond = PTHREAD_COND_INITIALIZER;
static mdbx_thread_key_t mdbx_rthc_key;
@@ -515,9 +525,9 @@ __cold void mdbx_rthc_global_dtor(void) {
mdbx_thread_key_delete(key);
for (MDBX_reader *rthc = rthc_table[i].begin; rthc < rthc_table[i].end;
++rthc) {
mdbx_trace("== [%i] = key %u, %p ... %p, rthc %p (%+i), "
mdbx_trace("== [%i] = key %zu, %p ... %p, rthc %p (%+i), "
"rthc-pid %i, current-pid %i",
i, key, rthc_table[i].begin, rthc_table[i].end, rthc,
i, (size_t)key, rthc_table[i].begin, rthc_table[i].end, rthc,
(int)(rthc - rthc_table[i].begin), rthc->mr_pid, self_pid);
if (rthc->mr_pid == self_pid) {
rthc->mr_pid = 0;
@@ -553,8 +563,8 @@ __cold int mdbx_rthc_alloc(mdbx_thread_key_t *key, MDBX_reader *begin,
return rc;
mdbx_rthc_lock();
mdbx_trace(">> key 0x%x, rthc_count %u, rthc_limit %u", *key, rthc_count,
rthc_limit);
mdbx_trace(">> key %zu, rthc_count %u, rthc_limit %u", (size_t)*key,
rthc_count, rthc_limit);
if (rthc_count == rthc_limit) {
rthc_entry_t *new_table =
mdbx_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table,
@@ -568,13 +578,14 @@ __cold int mdbx_rthc_alloc(mdbx_thread_key_t *key, MDBX_reader *begin,
rthc_table = new_table;
rthc_limit *= 2;
}
mdbx_trace("== [%i] = key %u, %p ... %p", rthc_count, *key, begin, end);
mdbx_trace("== [%i] = key %zu, %p ... %p", rthc_count, (size_t)*key, begin,
end);
rthc_table[rthc_count].key = *key;
rthc_table[rthc_count].begin = begin;
rthc_table[rthc_count].end = end;
++rthc_count;
mdbx_trace("<< key 0x%x, rthc_count %u, rthc_limit %u", *key, rthc_count,
rthc_limit);
mdbx_trace("<< key %zu, rthc_count %u, rthc_limit %u", (size_t)*key,
rthc_count, rthc_limit);
mdbx_rthc_unlock();
return MDBX_SUCCESS;
@@ -587,8 +598,8 @@ bailout:
__cold void mdbx_rthc_remove(const mdbx_thread_key_t key) {
mdbx_thread_key_delete(key);
mdbx_rthc_lock();
mdbx_trace(">> key 0x%x, rthc_count %u, rthc_limit %u", key, rthc_count,
rthc_limit);
mdbx_trace(">> key %zu, rthc_count %u, rthc_limit %u", (size_t)key,
rthc_count, rthc_limit);
for (unsigned i = 0; i < rthc_count; ++i) {
if (key == rthc_table[i].key) {
@@ -614,8 +625,8 @@ __cold void mdbx_rthc_remove(const mdbx_thread_key_t key) {
}
}
mdbx_trace("<< key 0x%x, rthc_count %u, rthc_limit %u", key, rthc_count,
rthc_limit);
mdbx_trace("<< key %zu, rthc_count %u, rthc_limit %u", (size_t)key,
rthc_count, rthc_limit);
mdbx_rthc_unlock();
}
@@ -3030,7 +3041,7 @@ __cold static int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
int rc = (flags & MDBX_WRITEMAP)
? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes,
flags & MDBX_MAPASYNC)
: mdbx_filesync(env->me_fd, false);
: mdbx_filesync(env->me_fd, MDBX_SYNC_DATA);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -5411,14 +5422,16 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
goto fail;
if ((flags & MDBX_MAPASYNC) == 0) {
if (unlikely(pending->mm_geo.next > steady->mm_geo.now)) {
rc = mdbx_filesize_sync(env->me_fd);
rc = mdbx_filesync(env->me_fd, MDBX_SYNC_SIZE);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
env->me_sync_pending = 0;
}
} else {
rc = mdbx_filesync(env->me_fd, pending->mm_geo.next > steady->mm_geo.now);
rc = mdbx_filesync(env->me_fd, (pending->mm_geo.next > steady->mm_geo.now)
? MDBX_SYNC_DATA | MDBX_SYNC_SIZE
: MDBX_SYNC_DATA);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
env->me_sync_pending = 0;
@@ -5566,7 +5579,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
} else {
rc = mdbx_filesync(env->me_fd, false);
rc = mdbx_filesync(env->me_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (rc != MDBX_SUCCESS)
goto undo;
}
@@ -11814,12 +11827,15 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
mdbx_txn_abort(read_txn);
if (likely(rc == MDBX_SUCCESS))
rc = mdbx_filesync(fd, true);
rc = mdbx_filesync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE);
/* Write actual meta */
if (likely(rc == MDBX_SUCCESS))
rc = mdbx_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0);
if (likely(rc == MDBX_SUCCESS))
rc = mdbx_filesync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
mdbx_memalign_free(buffer);
return rc;
}
@@ -12860,7 +12876,7 @@ int __cold mdbx_setup_debug(int flags, MDBX_debug_func *logger) {
unsigned ret = mdbx_runtime_flags;
mdbx_runtime_flags = flags;
#ifdef __linux__
#if defined(__linux__) || defined(__gnu_linux__)
if (flags & MDBX_DBG_DUMP) {
int core_filter_fd = open("/proc/self/coredump_filter", O_TRUNC | O_RDWR);
if (core_filter_fd >= 0) {
@@ -12883,7 +12899,7 @@ int __cold mdbx_setup_debug(int flags, MDBX_debug_func *logger) {
close(core_filter_fd);
}
}
#endif /* __linux__ */
#endif /* Linux */
mdbx_debug_logger = logger;
return ret;

View File

@@ -159,9 +159,14 @@ typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 {
/* Prototype should match libc runtime. ISO POSIX (2003) & LSB 1.x-3.x */
__nothrow __noreturn void __assert_fail(const char *assertion, const char *file,
unsigned line, const char *function);
#elif (defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__BSD__) || defined(__NETBSD__) || defined(__bsdi__) || \
defined(__DragonFly__))
#elif defined(__APPLE__) || defined(__MACH__)
__nothrow __noreturn void __assert_rtn(const char *function, const char *file,
int line, const char *assertion);
#define __assert_fail(assertion, file, line, function) \
__assert_rtn(function, file, line, assertion)
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__BSD__) || defined(__NETBSD__) || defined(__bsdi__) || \
defined(__DragonFly__)
__nothrow __noreturn void __assert(const char *function, const char *file,
int line, const char *assertion);
#define __assert_fail(assertion, file, line, function) \
@@ -548,6 +553,9 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode,
if (fd_flags != -1)
(void)fcntl(*fd, F_SETFL, fd_flags | O_DIRECT);
#endif /* O_DIRECT */
#if defined(F_NOCACHE)
(void)fcntl(*fd, F_NOCACHE, 1);
#endif /* F_NOCACHE */
}
#endif
@@ -626,7 +634,7 @@ int mdbx_pwrite(mdbx_filehandle_t fd, const void *buf, size_t bytes,
int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
uint64_t offset, size_t expected_written) {
#if defined(_WIN32) || defined(_WIN64)
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__)
size_t written = 0;
for (int i = 0; i < iovcnt; ++i) {
int rc = mdbx_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
@@ -652,11 +660,23 @@ int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
#endif
}
int mdbx_filesync(mdbx_filehandle_t fd, bool filesize_changed) {
int mdbx_filesync(mdbx_filehandle_t fd, enum mdbx_syncmode_bits mode_bits) {
#if defined(_WIN32) || defined(_WIN64)
(void)filesize_changed;
return FlushFileBuffers(fd) ? MDBX_SUCCESS : GetLastError();
return ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) == 0 ||
FlushFileBuffers(fd))
? MDBX_SUCCESS
: GetLastError();
#else
#if defined(__APPLE__) && \
MDBX_OSX_SPEED_OR_DURABILITY == MDBX_OSX_WANNA_DURABILITY
if (mode_bits & MDBX_SYNC_IODQ)
return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno;
#endif /* MacOS */
#if defined(__linux__) || defined(__gnu_linux__)
if (mode_bits == MDBX_SYNC_SIZE && linux_kernel_version >= 0x03060000)
return MDBX_SUCCESS;
#endif /* Linux */
int rc;
do {
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
@@ -665,12 +685,12 @@ int mdbx_filesync(mdbx_filehandle_t fd, bool filesize_changed) {
*
* For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
if (!filesize_changed) {
if ((mode_bits & MDBX_SYNC_SIZE) == 0) {
if (fdatasync(fd) == 0)
return MDBX_SUCCESS;
} else
#else
(void)filesize_changed;
(void)mode_bits;
#endif
if (fsync(fd) == 0)
return MDBX_SUCCESS;
@@ -680,22 +700,6 @@ int mdbx_filesync(mdbx_filehandle_t fd, bool filesize_changed) {
#endif
}
int mdbx_filesize_sync(mdbx_filehandle_t fd) {
#if defined(_WIN32) || defined(_WIN64)
(void)fd;
/* Nothing on Windows (i.e. newer 100% steady) */
return MDBX_SUCCESS;
#else
for (;;) {
if (fsync(fd) == 0)
return MDBX_SUCCESS;
int rc = errno;
if (rc != EINTR)
return rc;
}
#endif
}
int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length) {
#if defined(_WIN32) || defined(_WIN64)
BY_HANDLE_FILE_INFORMATION info;
@@ -792,7 +796,13 @@ int mdbx_msync(mdbx_mmap_t *map, size_t offset, size_t length, int async) {
return MDBX_SUCCESS;
#endif /* Linux */
const int mode = async ? MS_ASYNC : MS_SYNC;
return (msync(ptr, length, mode) == 0) ? MDBX_SUCCESS : errno;
int rc = (msync(ptr, length, mode) == 0) ? MDBX_SUCCESS : errno;
#if defined(__APPLE__) && \
MDBX_OSX_SPEED_OR_DURABILITY == MDBX_OSX_WANNA_DURABILITY
if (rc == MDBX_SUCCESS && mode == MS_SYNC)
rc = likely(fcntl(map->fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno;
#endif /* MacOS */
return rc;
#endif
}
@@ -1165,7 +1175,10 @@ retry_mapview:;
return rc;
#else
if (limit != map->length) {
#if defined(_GNU_SOURCE) && !defined(__FreeBSD__)
#if defined(_GNU_SOURCE) && \
!(defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__BSD__) || defined(__NETBSD__) || defined(__bsdi__) || \
defined(__DragonFly__) || defined(__APPLE__) || defined(__MACH__))
void *ptr = mremap(map->address, map->length, limit,
/* LY: in case changing the mapping size calling code
must guarantees the absence of competing threads, and

View File

@@ -55,7 +55,7 @@
#include <time.h>
#if !(defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__BSD__) || defined(__NETBSD__) || defined(__bsdi__) || \
defined(__DragonFly__))
defined(__DragonFly__) || defined(__APPLE__) || defined(__MACH__))
#include <malloc.h>
#endif /* xBSD */
@@ -522,7 +522,13 @@ int mdbx_thread_create(mdbx_thread_t *thread,
void *arg);
int mdbx_thread_join(mdbx_thread_t thread);
int mdbx_filesync(mdbx_filehandle_t fd, bool fullsync);
enum mdbx_syncmode_bits {
MDBX_SYNC_DATA = 1,
MDBX_SYNC_SIZE = 2,
MDBX_SYNC_IODQ = 4
};
int mdbx_filesync(mdbx_filehandle_t fd, enum mdbx_syncmode_bits mode_bits);
int mdbx_filesize_sync(mdbx_filehandle_t fd);
int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos);