2019-09-09 13:40:24 +03:00
|
|
|
|
/*
|
2023-01-16 16:24:51 +03:00
|
|
|
|
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
|
2017-03-16 18:09:27 +03:00
|
|
|
|
* and other libmdbx authors: please see AUTHORS file.
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
*
|
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
|
* modification, are permitted only as authorized by the OpenLDAP
|
|
|
|
|
* Public License.
|
|
|
|
|
*
|
|
|
|
|
* A copy of this license is available in the file LICENSE in the
|
|
|
|
|
* top-level directory of the distribution or, alternatively, at
|
2017-05-23 14:44:53 +03:00
|
|
|
|
* <http://www.OpenLDAP.org/license.html>. */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
|
|
|
|
#pragma once
|
2019-09-10 14:32:17 +03:00
|
|
|
|
#ifdef MDBX_CONFIG_H
|
|
|
|
|
#include MDBX_CONFIG_H
|
|
|
|
|
#endif
|
|
|
|
|
|
2020-10-14 18:15:50 +03:00
|
|
|
|
#define LIBMDBX_INTERNALS
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#ifdef xMDBX_TOOLS
|
2020-10-14 18:15:50 +03:00
|
|
|
|
#define MDBX_DEPRECATED
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#endif /* xMDBX_TOOLS */
|
2020-10-14 18:15:50 +03:00
|
|
|
|
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#ifdef xMDBX_ALLOY
|
2019-08-31 17:10:04 +03:00
|
|
|
|
/* Amalgamated build */
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#define MDBX_INTERNAL_FUNC static
|
|
|
|
|
#define MDBX_INTERNAL_VAR static
|
2019-08-31 17:10:04 +03:00
|
|
|
|
#else
|
|
|
|
|
/* Non-amalgamated build */
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#define MDBX_INTERNAL_FUNC
|
|
|
|
|
#define MDBX_INTERNAL_VAR extern
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#endif /* xMDBX_ALLOY */
|
2019-08-31 17:10:04 +03:00
|
|
|
|
|
2022-06-02 18:59:58 +03:00
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
|
2021-04-30 02:01:22 +03:00
|
|
|
|
/** Disables using GNU/Linux libc extensions.
|
|
|
|
|
* \ingroup build_option
|
2023-01-10 14:16:08 +03:00
|
|
|
|
* \note This option couldn't be moved to the options.h since dependent
|
2021-04-30 02:01:22 +03:00
|
|
|
|
* control macros/defined should be prepared before include the options.h */
|
2019-10-06 14:59:53 +03:00
|
|
|
|
#ifndef MDBX_DISABLE_GNU_SOURCE
|
|
|
|
|
#define MDBX_DISABLE_GNU_SOURCE 0
|
|
|
|
|
#endif
|
|
|
|
|
#if MDBX_DISABLE_GNU_SOURCE
|
|
|
|
|
#undef _GNU_SOURCE
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE)
|
2019-10-06 14:59:53 +03:00
|
|
|
|
#define _GNU_SOURCE
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#endif /* MDBX_DISABLE_GNU_SOURCE */
|
2019-10-06 14:59:53 +03:00
|
|
|
|
|
2017-05-24 18:50:24 +03:00
|
|
|
|
/* Should be defined before any includes */
|
2022-04-19 20:00:29 +03:00
|
|
|
|
#if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && \
|
|
|
|
|
!defined(ANDROID)
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
2017-05-24 18:50:24 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
2019-08-13 02:07:10 +03:00
|
|
|
|
#ifdef __APPLE__
|
|
|
|
|
#define _DARWIN_C_SOURCE
|
|
|
|
|
#endif
|
|
|
|
|
|
2017-07-02 09:07:57 +03:00
|
|
|
|
#ifdef _MSC_VER
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if _MSC_FULL_VER < 190024234
|
|
|
|
|
/* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual
|
|
|
|
|
* Studio 2015 Update 3). But you could remove this #error and try to continue
|
|
|
|
|
* at your own risk. In such case please don't rise up an issues related ONLY to
|
|
|
|
|
* old compilers.
|
2021-11-11 20:38:39 +03:00
|
|
|
|
*
|
|
|
|
|
* NOTE:
|
|
|
|
|
* Unfortunately, there are several different builds of "Visual Studio" that
|
|
|
|
|
* are called "Visual Studio 2015 Update 3".
|
|
|
|
|
*
|
|
|
|
|
* The 190024234 is used here because it is minimal version of Visual Studio
|
|
|
|
|
* that was used for build and testing libmdbx in recent years. Soon this
|
|
|
|
|
* value will be increased to 19.0.24241.7, since build and testing using
|
|
|
|
|
* "Visual Studio 2015" will be performed only at https://ci.appveyor.com.
|
|
|
|
|
*
|
|
|
|
|
* Please ask Microsoft (but not us) for information about version differences
|
|
|
|
|
* and how to and where you can obtain the latest "Visual Studio 2015" build
|
|
|
|
|
* with all fixes.
|
2021-04-30 02:01:22 +03:00
|
|
|
|
*/
|
|
|
|
|
#error \
|
|
|
|
|
"At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required."
|
|
|
|
|
#endif
|
|
|
|
|
#ifndef _CRT_SECURE_NO_WARNINGS
|
|
|
|
|
#define _CRT_SECURE_NO_WARNINGS
|
2021-10-22 20:14:12 +03:00
|
|
|
|
#endif /* _CRT_SECURE_NO_WARNINGS */
|
2017-07-03 06:30:43 +03:00
|
|
|
|
#if _MSC_VER > 1800
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#pragma warning(disable : 4464) /* relative include path contains '..' */
|
2017-07-03 06:30:43 +03:00
|
|
|
|
#endif
|
2018-05-21 16:31:36 +03:00
|
|
|
|
#if _MSC_VER > 1913
|
2023-02-11 00:25:14 +03:00
|
|
|
|
#pragma warning(disable : 5045) /* will insert Spectre mitigation... */
|
2018-05-21 16:31:36 +03:00
|
|
|
|
#endif
|
2022-03-28 23:11:31 +03:00
|
|
|
|
#if _MSC_VER > 1914
|
|
|
|
|
#pragma warning( \
|
2023-04-19 11:02:53 +03:00
|
|
|
|
disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \
|
|
|
|
|
producing 'defined' has undefined behavior */
|
2022-03-28 23:11:31 +03:00
|
|
|
|
#endif
|
2023-02-11 00:25:14 +03:00
|
|
|
|
#if _MSC_VER > 1930
|
|
|
|
|
#pragma warning(disable : 6235) /* <expression> is always a constant */
|
|
|
|
|
#pragma warning(disable : 6237) /* <expression> is never evaluated and might \
|
|
|
|
|
have side effects */
|
|
|
|
|
#endif
|
2017-07-02 09:07:57 +03:00
|
|
|
|
#pragma warning(disable : 4710) /* 'xyz': function not inlined */
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#pragma warning(disable : 4711) /* function 'xyz' selected for automatic \
|
|
|
|
|
inline expansion */
|
2023-04-19 11:02:53 +03:00
|
|
|
|
#pragma warning(disable : 4201) /* nonstandard extension used: nameless \
|
|
|
|
|
struct/union */
|
2017-10-29 03:23:39 +03:00
|
|
|
|
#pragma warning(disable : 4702) /* unreachable code */
|
2017-07-02 09:07:57 +03:00
|
|
|
|
#pragma warning(disable : 4706) /* assignment within conditional expression */
|
|
|
|
|
#pragma warning(disable : 4127) /* conditional expression is constant */
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#pragma warning(disable : 4324) /* 'xyz': structure was padded due to \
|
|
|
|
|
alignment specifier */
|
2017-07-02 09:07:57 +03:00
|
|
|
|
#pragma warning(disable : 4310) /* cast truncates constant value */
|
2023-04-19 11:02:53 +03:00
|
|
|
|
#pragma warning(disable : 4820) /* bytes padding added after data member for \
|
|
|
|
|
alignment */
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#pragma warning(disable : 4548) /* expression before comma has no effect; \
|
|
|
|
|
expected expression with side - effect */
|
|
|
|
|
#pragma warning(disable : 4366) /* the result of the unary '&' operator may be \
|
|
|
|
|
unaligned */
|
|
|
|
|
#pragma warning(disable : 4200) /* nonstandard extension used: zero-sized \
|
|
|
|
|
array in struct/union */
|
|
|
|
|
#pragma warning(disable : 4204) /* nonstandard extension used: non-constant \
|
|
|
|
|
aggregate initializer */
|
|
|
|
|
#pragma warning( \
|
2023-04-19 11:02:53 +03:00
|
|
|
|
disable : 4505) /* unreferenced local function has been removed */
|
|
|
|
|
#endif /* _MSC_VER (warnings) */
|
2017-05-24 18:50:24 +03:00
|
|
|
|
|
2022-01-27 16:31:13 +03:00
|
|
|
|
#if defined(__GNUC__) && __GNUC__ < 9
|
|
|
|
|
#pragma GCC diagnostic ignored "-Wattributes"
|
|
|
|
|
#endif /* GCC < 9 */
|
|
|
|
|
|
2022-03-30 18:13:08 +03:00
|
|
|
|
#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && \
|
|
|
|
|
!defined(__USE_MINGW_ANSI_STDIO)
|
|
|
|
|
#define __USE_MINGW_ANSI_STDIO 1
|
2022-11-08 19:44:42 +03:00
|
|
|
|
#endif /* MinGW */
|
2022-03-30 18:13:08 +03:00
|
|
|
|
|
2022-10-09 23:05:20 +03:00
|
|
|
|
#if (defined(_WIN32) || defined(_WIN64)) && !defined(UNICODE)
|
|
|
|
|
#define UNICODE
|
|
|
|
|
#endif /* UNICODE */
|
|
|
|
|
|
2020-04-15 17:09:37 +03:00
|
|
|
|
#include "../mdbx.h"
|
2022-06-02 18:59:58 +03:00
|
|
|
|
#include "base.h"
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if defined(__GNUC__) && !__GNUC_PREREQ(4, 2)
|
|
|
|
|
/* Actually libmdbx was not tested with compilers older than GCC 4.2.
|
|
|
|
|
* But you could ignore this warning at your own risk.
|
|
|
|
|
* In such case please don't rise up an issues related ONLY to old compilers.
|
|
|
|
|
*/
|
|
|
|
|
#warning "libmdbx required GCC >= 4.2"
|
2018-03-07 13:06:39 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if defined(__clang__) && !__CLANG_PREREQ(3, 8)
|
|
|
|
|
/* Actually libmdbx was not tested with CLANG older than 3.8.
|
|
|
|
|
* But you could ignore this warning at your own risk.
|
|
|
|
|
* In such case please don't rise up an issues related ONLY to old compilers.
|
|
|
|
|
*/
|
|
|
|
|
#warning "libmdbx required CLANG >= 3.8"
|
2017-03-16 18:09:27 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12)
|
|
|
|
|
/* Actually libmdbx was not tested with something older than glibc 2.12.
|
|
|
|
|
* But you could ignore this warning at your own risk.
|
|
|
|
|
* In such case please don't rise up an issues related ONLY to old systems.
|
|
|
|
|
*/
|
|
|
|
|
#warning "libmdbx was only tested with GLIBC >= 2.12."
|
2017-03-16 18:09:27 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#ifdef __SANITIZE_THREAD__
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#warning \
|
|
|
|
|
"libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues."
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#endif /* __SANITIZE_THREAD__ */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#if __has_warning("-Wnested-anon-types")
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if defined(__clang__)
|
|
|
|
|
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
#pragma GCC diagnostic ignored "-Wnested-anon-types"
|
|
|
|
|
#else
|
|
|
|
|
#pragma warning disable "nested-anon-types"
|
|
|
|
|
#endif
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#endif /* -Wnested-anon-types */
|
|
|
|
|
|
2018-03-07 13:07:58 +03:00
|
|
|
|
#if __has_warning("-Wconstant-logical-operand")
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if defined(__clang__)
|
|
|
|
|
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
|
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
#pragma GCC diagnostic ignored "-Wconstant-logical-operand"
|
|
|
|
|
#else
|
|
|
|
|
#pragma warning disable "constant-logical-operand"
|
|
|
|
|
#endif
|
2018-03-07 13:07:58 +03:00
|
|
|
|
#endif /* -Wconstant-logical-operand */
|
|
|
|
|
|
2018-03-14 15:16:49 +03:00
|
|
|
|
#if defined(__LCC__) && (__LCC__ <= 121)
|
2021-04-30 02:01:22 +03:00
|
|
|
|
/* bug #2798 */
|
|
|
|
|
#pragma diag_suppress alignment_reduction_ignored
|
2018-03-14 14:57:46 +03:00
|
|
|
|
#elif defined(__ICC)
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#pragma warning(disable : 3453 1366)
|
2018-03-14 15:16:49 +03:00
|
|
|
|
#elif __has_warning("-Walignment-reduction-ignored")
|
2021-04-30 02:01:22 +03:00
|
|
|
|
#if defined(__clang__)
|
|
|
|
|
#pragma clang diagnostic ignored "-Walignment-reduction-ignored"
|
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
#pragma GCC diagnostic ignored "-Walignment-reduction-ignored"
|
|
|
|
|
#else
|
|
|
|
|
#pragma warning disable "alignment-reduction-ignored"
|
|
|
|
|
#endif
|
2018-03-14 15:32:32 +03:00
|
|
|
|
#endif /* -Walignment-reduction-ignored */
|
2018-03-07 13:31:33 +03:00
|
|
|
|
|
2022-08-04 14:28:35 +03:00
|
|
|
|
#ifndef MDBX_EXCLUDE_FOR_GPROF
|
|
|
|
|
#ifdef ENABLE_GPROF
|
|
|
|
|
#define MDBX_EXCLUDE_FOR_GPROF \
|
|
|
|
|
__attribute__((__no_instrument_function__, \
|
|
|
|
|
__no_profile_instrument_function__))
|
|
|
|
|
#else
|
|
|
|
|
#define MDBX_EXCLUDE_FOR_GPROF
|
|
|
|
|
#endif /* ENABLE_GPROF */
|
|
|
|
|
#endif /* MDBX_EXCLUDE_FOR_GPROF */
|
|
|
|
|
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
2019-11-13 20:14:17 +03:00
|
|
|
|
#include "osal.h"
|
2019-09-18 19:52:50 +03:00
|
|
|
|
|
2019-09-05 11:57:52 +03:00
|
|
|
|
#define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY)
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#if defined(xMDBX_TOOLS)
|
2019-09-05 11:57:52 +03:00
|
|
|
|
extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
|
|
|
|
#endif
|
|
|
|
|
|
2019-11-13 20:14:17 +03:00
|
|
|
|
#include "options.h"
|
|
|
|
|
|
2021-04-30 02:01:22 +03:00
|
|
|
|
/* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */
|
|
|
|
|
#if MDBX_DEBUG
|
|
|
|
|
#undef NDEBUG
|
|
|
|
|
#endif
|
|
|
|
|
|
2022-12-30 17:18:52 +03:00
|
|
|
|
#ifndef __cplusplus
|
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
/* Debug and Logging stuff */
|
|
|
|
|
|
|
|
|
|
#define MDBX_RUNTIME_FLAGS_INIT \
|
|
|
|
|
((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT
|
|
|
|
|
|
|
|
|
|
extern uint8_t runtime_flags;
|
|
|
|
|
extern uint8_t loglevel;
|
|
|
|
|
extern MDBX_debug_func *debug_logger;
|
|
|
|
|
|
|
|
|
|
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny) {
|
|
|
|
|
#if MDBX_DEBUG
|
|
|
|
|
if (MDBX_DBG_JITTER & runtime_flags)
|
|
|
|
|
osal_jitter(tiny);
|
|
|
|
|
#else
|
|
|
|
|
(void)tiny;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MDBX_INTERNAL_FUNC void MDBX_PRINTF_ARGS(4, 5)
|
|
|
|
|
debug_log(int level, const char *function, int line, const char *fmt, ...)
|
|
|
|
|
MDBX_PRINTF_ARGS(4, 5);
|
|
|
|
|
MDBX_INTERNAL_FUNC void debug_log_va(int level, const char *function, int line,
|
|
|
|
|
const char *fmt, va_list args);
|
|
|
|
|
|
|
|
|
|
#if MDBX_DEBUG
|
|
|
|
|
#define LOG_ENABLED(msg) unlikely(msg <= loglevel)
|
|
|
|
|
#define AUDIT_ENABLED() unlikely((runtime_flags & MDBX_DBG_AUDIT))
|
|
|
|
|
#else /* MDBX_DEBUG */
|
|
|
|
|
#define LOG_ENABLED(msg) (msg < MDBX_LOG_VERBOSE && msg <= loglevel)
|
|
|
|
|
#define AUDIT_ENABLED() (0)
|
|
|
|
|
#endif /* MDBX_DEBUG */
|
|
|
|
|
|
|
|
|
|
#if MDBX_FORCE_ASSERTIONS
|
|
|
|
|
#define ASSERT_ENABLED() (1)
|
|
|
|
|
#elif MDBX_DEBUG
|
|
|
|
|
#define ASSERT_ENABLED() likely((runtime_flags & MDBX_DBG_ASSERT))
|
|
|
|
|
#else
|
|
|
|
|
#define ASSERT_ENABLED() (0)
|
|
|
|
|
#endif /* assertions */
|
|
|
|
|
|
|
|
|
|
#define DEBUG_EXTRA(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_EXTRA)) \
|
|
|
|
|
debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define DEBUG_EXTRA_PRINT(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_EXTRA)) \
|
|
|
|
|
debug_log(MDBX_LOG_EXTRA, NULL, 0, fmt, __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define TRACE(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_TRACE)) \
|
|
|
|
|
debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define DEBUG(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_DEBUG)) \
|
|
|
|
|
debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define VERBOSE(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_VERBOSE)) \
|
|
|
|
|
debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define NOTICE(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_NOTICE)) \
|
|
|
|
|
debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define WARNING(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_WARN)) \
|
|
|
|
|
debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#undef ERROR /* wingdi.h \
|
|
|
|
|
Yeah, morons from M$ put such definition to the public header. */
|
|
|
|
|
|
|
|
|
|
#define ERROR(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
if (LOG_ENABLED(MDBX_LOG_ERROR)) \
|
|
|
|
|
debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define FATAL(fmt, ...) \
|
|
|
|
|
debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);
|
|
|
|
|
|
|
|
|
|
#if MDBX_DEBUG
|
|
|
|
|
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
|
|
|
|
#else /* MDBX_DEBUG */
|
|
|
|
|
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
|
|
|
|
|
unsigned line);
|
|
|
|
|
#define ASSERT_FAIL(env, msg, func, line) \
|
|
|
|
|
do { \
|
|
|
|
|
(void)(env); \
|
|
|
|
|
assert_fail(msg, func, line); \
|
|
|
|
|
} while (0)
|
|
|
|
|
#endif /* MDBX_DEBUG */
|
|
|
|
|
|
|
|
|
|
#define ENSURE_MSG(env, expr, msg) \
|
|
|
|
|
do { \
|
|
|
|
|
if (unlikely(!(expr))) \
|
|
|
|
|
ASSERT_FAIL(env, msg, __func__, __LINE__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)
|
|
|
|
|
|
|
|
|
|
/* assert(3) variant in environment context */
|
|
|
|
|
#define eASSERT(env, expr) \
|
|
|
|
|
do { \
|
|
|
|
|
if (ASSERT_ENABLED()) \
|
|
|
|
|
ENSURE(env, expr); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
/* assert(3) variant in cursor context */
|
|
|
|
|
#define cASSERT(mc, expr) eASSERT((mc)->mc_txn->mt_env, expr)
|
|
|
|
|
|
|
|
|
|
/* assert(3) variant in transaction context */
|
|
|
|
|
#define tASSERT(txn, expr) eASSERT((txn)->mt_env, expr)
|
|
|
|
|
|
|
|
|
|
#ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */
|
|
|
|
|
#undef assert
|
|
|
|
|
#define assert(expr) eASSERT(NULL, expr)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#endif /* __cplusplus */
|
|
|
|
|
|
2017-03-16 18:09:27 +03:00
|
|
|
|
/*----------------------------------------------------------------------------*/
|
2021-04-20 01:36:34 +03:00
|
|
|
|
/* Atomics */
|
|
|
|
|
|
|
|
|
|
enum MDBX_memory_order {
|
|
|
|
|
mo_Relaxed,
|
2022-07-11 23:26:02 +03:00
|
|
|
|
mo_AcquireRelease
|
|
|
|
|
/* , mo_SequentialConsistency */
|
2021-04-20 01:36:34 +03:00
|
|
|
|
};
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2021-01-30 02:28:12 +03:00
|
|
|
|
typedef union {
|
|
|
|
|
volatile uint32_t weak;
|
|
|
|
|
#ifdef MDBX_HAVE_C11ATOMICS
|
|
|
|
|
volatile _Atomic uint32_t c11a;
|
|
|
|
|
#endif /* MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
} MDBX_atomic_uint32_t;
|
|
|
|
|
|
|
|
|
|
typedef union {
|
|
|
|
|
volatile uint64_t weak;
|
|
|
|
|
#if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC)
|
|
|
|
|
volatile _Atomic uint64_t c11a;
|
|
|
|
|
#endif
|
|
|
|
|
#if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC
|
|
|
|
|
__anonymous_struct_extension__ struct {
|
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
|
|
|
MDBX_atomic_uint32_t low, high;
|
|
|
|
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
|
|
|
MDBX_atomic_uint32_t high, low;
|
|
|
|
|
#else
|
|
|
|
|
#error "FIXME: Unsupported byte order"
|
|
|
|
|
#endif /* __BYTE_ORDER__ */
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
} MDBX_atomic_uint64_t;
|
|
|
|
|
|
2021-04-20 01:36:34 +03:00
|
|
|
|
#ifdef MDBX_HAVE_C11ATOMICS
|
|
|
|
|
|
|
|
|
|
/* Crutches for C11 atomic compiler's bugs */
|
|
|
|
|
#if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127
|
|
|
|
|
#define MDBX_c11a_ro(type, ptr) (&(ptr)->weak)
|
|
|
|
|
#define MDBX_c11a_rw(type, ptr) (&(ptr)->weak)
|
|
|
|
|
#elif defined(__clang__) && __clang__ < 8
|
|
|
|
|
#define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a)
|
|
|
|
|
#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
|
|
|
|
|
#else
|
|
|
|
|
#define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a)
|
|
|
|
|
#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
|
|
|
|
|
#endif /* Crutches for C11 atomic compiler's bugs */
|
|
|
|
|
|
2022-06-12 20:04:35 +03:00
|
|
|
|
#define mo_c11_store(fence) \
|
|
|
|
|
(((fence) == mo_Relaxed) ? memory_order_relaxed \
|
|
|
|
|
: ((fence) == mo_AcquireRelease) ? memory_order_release \
|
|
|
|
|
: memory_order_seq_cst)
|
|
|
|
|
#define mo_c11_load(fence) \
|
|
|
|
|
(((fence) == mo_Relaxed) ? memory_order_relaxed \
|
|
|
|
|
: ((fence) == mo_AcquireRelease) ? memory_order_acquire \
|
|
|
|
|
: memory_order_seq_cst)
|
2021-04-20 01:36:34 +03:00
|
|
|
|
|
|
|
|
|
#endif /* MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
|
|
|
|
|
#ifndef __cplusplus
|
|
|
|
|
|
|
|
|
|
#ifdef MDBX_HAVE_C11ATOMICS
|
2022-08-11 01:03:15 +03:00
|
|
|
|
#define osal_memory_fence(order, write) \
|
2022-06-12 20:04:35 +03:00
|
|
|
|
atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order))
|
|
|
|
|
#else /* MDBX_HAVE_C11ATOMICS */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
#define osal_memory_fence(order, write) \
|
2022-06-12 20:04:35 +03:00
|
|
|
|
do { \
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_compiler_barrier(); \
|
2022-06-12 20:04:35 +03:00
|
|
|
|
if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed \
|
|
|
|
|
: mo_AcquireRelease)) \
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_memory_barrier(); \
|
2022-06-12 20:04:35 +03:00
|
|
|
|
} while (0)
|
2021-04-20 01:36:34 +03:00
|
|
|
|
#endif /* MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
|
2022-06-12 20:04:35 +03:00
|
|
|
|
#if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__)
|
|
|
|
|
#define atomic_store32(p, value, order) \
|
|
|
|
|
({ \
|
|
|
|
|
const uint32_t value_to_store = (value); \
|
|
|
|
|
atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, \
|
|
|
|
|
mo_c11_store(order)); \
|
|
|
|
|
value_to_store; \
|
|
|
|
|
})
|
|
|
|
|
#define atomic_load32(p, order) \
|
|
|
|
|
atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order))
|
|
|
|
|
#define atomic_store64(p, value, order) \
|
|
|
|
|
({ \
|
|
|
|
|
const uint64_t value_to_store = (value); \
|
|
|
|
|
atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, \
|
|
|
|
|
mo_c11_store(order)); \
|
|
|
|
|
value_to_store; \
|
|
|
|
|
})
|
|
|
|
|
#define atomic_load64(p, order) \
|
|
|
|
|
atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order))
|
|
|
|
|
#endif /* LCC && MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
|
|
|
|
|
#ifndef atomic_store32
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED static __always_inline uint32_t
|
2021-04-20 01:36:34 +03:00
|
|
|
|
atomic_store32(MDBX_atomic_uint32_t *p, const uint32_t value,
|
|
|
|
|
enum MDBX_memory_order order) {
|
|
|
|
|
STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4);
|
|
|
|
|
#ifdef MDBX_HAVE_C11ATOMICS
|
|
|
|
|
assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p)));
|
|
|
|
|
atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order));
|
|
|
|
|
#else /* MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
if (order != mo_Relaxed)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_compiler_barrier();
|
2021-04-20 01:36:34 +03:00
|
|
|
|
p->weak = value;
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_memory_fence(order, true);
|
2021-04-20 01:36:34 +03:00
|
|
|
|
#endif /* MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
return value;
|
|
|
|
|
}
|
2022-06-12 20:04:35 +03:00
|
|
|
|
#endif /* atomic_store32 */
|
2021-04-20 01:36:34 +03:00
|
|
|
|
|
2022-06-12 20:04:35 +03:00
|
|
|
|
#ifndef atomic_load32
|
2022-08-11 17:09:13 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(
|
|
|
|
|
const volatile MDBX_atomic_uint32_t *p, enum MDBX_memory_order order) {
|
2021-04-20 01:36:34 +03:00
|
|
|
|
STATIC_ASSERT(sizeof(MDBX_atomic_uint32_t) == 4);
|
|
|
|
|
#ifdef MDBX_HAVE_C11ATOMICS
|
|
|
|
|
assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p)));
|
|
|
|
|
return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order));
|
|
|
|
|
#else /* MDBX_HAVE_C11ATOMICS */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_memory_fence(order, false);
|
2021-04-20 01:36:34 +03:00
|
|
|
|
const uint32_t value = p->weak;
|
|
|
|
|
if (order != mo_Relaxed)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_compiler_barrier();
|
2021-04-20 01:36:34 +03:00
|
|
|
|
return value;
|
|
|
|
|
#endif /* MDBX_HAVE_C11ATOMICS */
|
|
|
|
|
}
|
2022-06-12 20:04:35 +03:00
|
|
|
|
#endif /* atomic_load32 */
|
2021-04-20 01:36:34 +03:00
|
|
|
|
|
|
|
|
|
#endif /* !__cplusplus */
|
|
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
/* Basic constants and types */
|
|
|
|
|
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* A stamp that identifies a file as an MDBX file.
|
|
|
|
|
* There's nothing special about this value other than that it is easily
|
|
|
|
|
* recognizable, and it will reflect any byte order mismatches. */
|
2017-05-30 16:22:42 +03:00
|
|
|
|
#define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2021-04-27 18:02:11 +03:00
|
|
|
|
/* FROZEN: The version number for a database's datafile format. */
|
2021-10-21 15:17:18 +03:00
|
|
|
|
#define MDBX_DATA_VERSION 3
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* The version number for a database's lockfile format. */
|
2022-10-11 13:11:12 +03:00
|
|
|
|
#define MDBX_LOCK_VERSION 5
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* handle for the DB used to track free pages. */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
#define FREE_DBI 0
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* handle for the default DB. */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
#define MAIN_DBI 1
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Number of DBs in metapage (free and main) - also hardcoded elsewhere */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
#define CORE_DBS 2
|
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Number of meta pages - also hardcoded elsewhere */
|
2017-05-24 21:43:29 +03:00
|
|
|
|
#define NUM_METAS 3
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-05-23 14:49:12 +03:00
|
|
|
|
/* A page number in the database.
|
|
|
|
|
*
|
|
|
|
|
* MDBX uses 32 bit for page numbers. This limits database
|
|
|
|
|
* size up to 2^44 bytes, in case of 4K pages. */
|
2017-06-05 14:22:52 +03:00
|
|
|
|
typedef uint32_t pgno_t;
|
2021-01-30 02:28:12 +03:00
|
|
|
|
typedef MDBX_atomic_uint32_t atomic_pgno_t;
|
2017-06-05 14:22:52 +03:00
|
|
|
|
#define PRIaPGNO PRIu32
|
2018-08-25 18:17:50 +03:00
|
|
|
|
#define MAX_PAGENO UINT32_C(0x7FFFffff)
|
2017-06-21 01:34:56 +03:00
|
|
|
|
#define MIN_PAGENO NUM_METAS
|
2017-05-23 14:49:12 +03:00
|
|
|
|
|
2020-05-15 01:15:08 +03:00
|
|
|
|
#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000)
|
|
|
|
|
|
2017-05-23 14:49:12 +03:00
|
|
|
|
/* A transaction ID. */
|
|
|
|
|
typedef uint64_t txnid_t;
|
2021-01-30 02:28:12 +03:00
|
|
|
|
typedef MDBX_atomic_uint64_t atomic_txnid_t;
|
2017-05-23 14:49:12 +03:00
|
|
|
|
#define PRIaTXN PRIi64
|
2019-09-10 19:27:46 +03:00
|
|
|
|
#define MIN_TXNID UINT64_C(1)
|
2020-05-15 01:15:08 +03:00
|
|
|
|
#define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1)
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1)
|
2019-12-21 00:57:47 +03:00
|
|
|
|
#define INVALID_TXNID UINT64_MAX
|
2019-10-15 23:53:00 +03:00
|
|
|
|
/* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
|
2021-04-29 19:50:25 +03:00
|
|
|
|
* #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */
|
|
|
|
|
#ifndef xMDBX_TXNID_STEP
|
2019-10-17 00:18:30 +03:00
|
|
|
|
#if MDBX_64BIT_CAS
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#define xMDBX_TXNID_STEP 1u
|
2019-10-17 00:18:30 +03:00
|
|
|
|
#else
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#define xMDBX_TXNID_STEP 2u
|
2019-10-17 00:18:30 +03:00
|
|
|
|
#endif
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#endif /* xMDBX_TXNID_STEP */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Used for offsets within a single page.
|
|
|
|
|
* Since memory pages are typically 4 or 8KB in size, 12-13 bits,
|
|
|
|
|
* this is plenty. */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
typedef uint16_t indx_t;
|
|
|
|
|
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MEGABYTE ((size_t)1 << 20)
|
|
|
|
|
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
/* Core structures for database and shared memory (i.e. format definition) */
|
2021-10-09 12:36:40 +03:00
|
|
|
|
#pragma pack(push, 4)
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Information about a single database in the environment. */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
typedef struct MDBX_db {
|
2017-06-05 14:22:52 +03:00
|
|
|
|
uint16_t md_flags; /* see mdbx_dbi_open */
|
|
|
|
|
uint16_t md_depth; /* depth of this tree */
|
2019-12-20 12:08:00 +03:00
|
|
|
|
uint32_t md_xsize; /* key-size for MDBX_DUPFIXED (LEAF2 pages) */
|
2017-06-05 14:22:52 +03:00
|
|
|
|
pgno_t md_root; /* the root page of this tree */
|
|
|
|
|
pgno_t md_branch_pages; /* number of internal pages */
|
|
|
|
|
pgno_t md_leaf_pages; /* number of leaf pages */
|
|
|
|
|
pgno_t md_overflow_pages; /* number of overflow pages */
|
|
|
|
|
uint64_t md_seq; /* table sequence counter */
|
|
|
|
|
uint64_t md_entries; /* number of data items */
|
2020-10-20 15:42:50 +03:00
|
|
|
|
uint64_t md_mod_txnid; /* txnid of last committed modification */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
} MDBX_db;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2019-09-17 23:52:51 +03:00
|
|
|
|
/* database size-related parameters */
|
2021-03-09 11:54:20 +03:00
|
|
|
|
typedef struct MDBX_geo {
|
|
|
|
|
uint16_t grow_pv; /* datafile growth step as a 16-bit packed (exponential
|
|
|
|
|
quantized) value */
|
|
|
|
|
uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed
|
|
|
|
|
(exponential quantized) value */
|
|
|
|
|
pgno_t lower; /* minimal size of datafile in pages */
|
|
|
|
|
pgno_t upper; /* maximal size of datafile in pages */
|
|
|
|
|
pgno_t now; /* current size of datafile in pages */
|
|
|
|
|
pgno_t next; /* first unused page in the datafile,
|
|
|
|
|
but actually the file may be shorter. */
|
|
|
|
|
} MDBX_geo;
|
2019-09-17 23:52:51 +03:00
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Meta page content.
|
|
|
|
|
* A meta page is the start point for accessing a database snapshot.
|
|
|
|
|
* Pages 0-1 are meta pages. Transaction N writes meta page (N % 2). */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
typedef struct MDBX_meta {
|
2017-05-30 16:22:42 +03:00
|
|
|
|
/* Stamp identifying this as an MDBX file.
|
|
|
|
|
* It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */
|
2020-12-01 20:23:23 +03:00
|
|
|
|
uint32_t mm_magic_and_version[2];
|
2017-05-30 16:22:42 +03:00
|
|
|
|
|
|
|
|
|
/* txnid that committed this page, the first of a two-phase-update pair */
|
2022-08-17 15:10:05 +03:00
|
|
|
|
union {
|
|
|
|
|
MDBX_atomic_uint32_t mm_txnid_a[2];
|
|
|
|
|
uint64_t unsafe_txnid;
|
|
|
|
|
};
|
2017-05-30 16:22:42 +03:00
|
|
|
|
|
|
|
|
|
uint16_t mm_extra_flags; /* extra DB flags, zero (nothing) for now */
|
|
|
|
|
uint8_t mm_validator_id; /* ID of checksum and page validation method,
|
|
|
|
|
* zero (nothing) for now */
|
|
|
|
|
uint8_t mm_extra_pagehdr; /* extra bytes in the page header,
|
|
|
|
|
* zero (nothing) for now */
|
|
|
|
|
|
2021-03-09 11:54:20 +03:00
|
|
|
|
MDBX_geo mm_geo; /* database size-related parameters */
|
2017-06-21 01:34:56 +03:00
|
|
|
|
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_db mm_dbs[CORE_DBS]; /* first is free space, 2nd is main db */
|
|
|
|
|
/* The size of pages used in this DB */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
#define mm_psize mm_dbs[FREE_DBI].md_xsize
|
2020-07-23 19:24:21 +03:00
|
|
|
|
MDBX_canary mm_canary;
|
2017-05-30 16:22:42 +03:00
|
|
|
|
|
2017-05-24 01:42:10 +03:00
|
|
|
|
#define MDBX_DATASIGN_NONE 0u
|
|
|
|
|
#define MDBX_DATASIGN_WEAK 1u
|
|
|
|
|
#define SIGN_IS_STEADY(sign) ((sign) > MDBX_DATASIGN_WEAK)
|
2020-12-01 20:23:23 +03:00
|
|
|
|
#define META_IS_STEADY(meta) \
|
2022-08-17 15:10:05 +03:00
|
|
|
|
SIGN_IS_STEADY(unaligned_peek_u64_volatile(4, (meta)->mm_sign))
|
|
|
|
|
union {
|
|
|
|
|
uint32_t mm_sign[2];
|
|
|
|
|
uint64_t unsafe_sign;
|
|
|
|
|
};
|
2017-05-30 16:22:42 +03:00
|
|
|
|
|
|
|
|
|
/* txnid that committed this page, the second of a two-phase-update pair */
|
2022-08-11 17:09:13 +03:00
|
|
|
|
MDBX_atomic_uint32_t mm_txnid_b[2];
|
2019-08-23 03:36:56 +03:00
|
|
|
|
|
|
|
|
|
/* Number of non-meta pages which were put in GC after COW. May be 0 in case
|
|
|
|
|
* DB was previously handled by libmdbx without corresponding feature.
|
|
|
|
|
* This value in couple with mr_snapshot_pages_retired allows fast estimation
|
|
|
|
|
* of "how much reader is restraining GC recycling". */
|
2020-12-01 20:23:23 +03:00
|
|
|
|
uint32_t mm_pages_retired[2];
|
2019-11-17 23:41:14 +03:00
|
|
|
|
|
|
|
|
|
/* The analogue /proc/sys/kernel/random/boot_id or similar to determine
|
|
|
|
|
* whether the system was rebooted after the last use of the database files.
|
|
|
|
|
* If there was no reboot, but there is no need to rollback to the last
|
|
|
|
|
* steady sync point. Zeros mean that no relevant information is available
|
|
|
|
|
* from the system. */
|
|
|
|
|
bin128_t mm_bootid;
|
|
|
|
|
|
2017-05-24 01:42:10 +03:00
|
|
|
|
} MDBX_meta;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2021-10-12 16:19:00 +03:00
|
|
|
|
#pragma pack(1)
|
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Common header for all page types. The page type depends on mp_flags.
|
|
|
|
|
*
|
2017-05-23 21:02:39 +03:00
|
|
|
|
* P_BRANCH and P_LEAF pages have unsorted 'MDBX_node's at the end, with
|
2017-05-23 14:44:53 +03:00
|
|
|
|
* sorted mp_ptrs[] entries referring to them. Exception: P_LEAF2 pages
|
2017-05-24 01:42:10 +03:00
|
|
|
|
* omit mp_ptrs and pack sorted MDBX_DUPFIXED values after the page header.
|
2017-05-23 14:44:53 +03:00
|
|
|
|
*
|
|
|
|
|
* P_OVERFLOW records occupy one or more contiguous pages where only the
|
|
|
|
|
* first has a page header. They hold the real data of F_BIGDATA nodes.
|
|
|
|
|
*
|
|
|
|
|
* P_SUBP sub-pages are small leaf "pages" with duplicate data.
|
|
|
|
|
* A node with flag F_DUPDATA but not F_SUBDATA contains a sub-page.
|
|
|
|
|
* (Duplicate data can also go in sub-databases, which use normal pages.)
|
|
|
|
|
*
|
2017-05-24 01:42:10 +03:00
|
|
|
|
* P_META pages contain MDBX_meta, the start point of an MDBX snapshot.
|
2017-05-23 14:44:53 +03:00
|
|
|
|
*
|
2017-05-24 01:42:10 +03:00
|
|
|
|
* Each non-metapage up to MDBX_meta.mm_last_pg is reachable exactly once
|
2019-10-18 20:28:36 +03:00
|
|
|
|
* in the snapshot: Either used by a database or listed in a GC record. */
|
2017-05-23 21:04:23 +03:00
|
|
|
|
typedef struct MDBX_page {
|
2021-04-14 01:41:42 +03:00
|
|
|
|
#define IS_FROZEN(txn, p) ((p)->mp_txnid < (txn)->mt_txnid)
|
|
|
|
|
#define IS_SPILLED(txn, p) ((p)->mp_txnid == (txn)->mt_txnid)
|
|
|
|
|
#define IS_SHADOWED(txn, p) ((p)->mp_txnid > (txn)->mt_txnid)
|
|
|
|
|
#define IS_VALID(txn, p) ((p)->mp_txnid <= (txn)->mt_front)
|
|
|
|
|
#define IS_MODIFIABLE(txn, p) ((p)->mp_txnid == (txn)->mt_front)
|
2022-11-26 16:58:10 +03:00
|
|
|
|
uint64_t mp_txnid; /* txnid which created page, maybe zero in legacy DB */
|
2022-07-07 15:48:24 +03:00
|
|
|
|
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
|
2022-07-08 21:48:08 +03:00
|
|
|
|
#define P_BRANCH 0x01u /* branch page */
|
|
|
|
|
#define P_LEAF 0x02u /* leaf page */
|
|
|
|
|
#define P_OVERFLOW 0x04u /* overflow page */
|
|
|
|
|
#define P_META 0x08u /* meta page */
|
|
|
|
|
#define P_LEGACY_DIRTY 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */
|
2022-07-07 15:48:24 +03:00
|
|
|
|
#define P_BAD P_LEGACY_DIRTY /* explicit flag for invalid/bad page */
|
2022-07-08 21:48:08 +03:00
|
|
|
|
#define P_LEAF2 0x20u /* for MDBX_DUPFIXED records */
|
|
|
|
|
#define P_SUBP 0x40u /* for MDBX_DUPSORT sub-pages */
|
|
|
|
|
#define P_SPILLED 0x2000u /* spilled in parent txn */
|
|
|
|
|
#define P_LOOSE 0x4000u /* page was dirtied then freed, can be reused */
|
|
|
|
|
#define P_FROZEN 0x8000u /* used for retire page with known status */
|
2022-07-07 02:25:35 +03:00
|
|
|
|
#define P_ILL_BITS \
|
|
|
|
|
((uint16_t) ~(P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_SPILLED))
|
2017-05-23 14:44:53 +03:00
|
|
|
|
uint16_t mp_flags;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
union {
|
2021-01-30 21:02:04 +03:00
|
|
|
|
uint32_t mp_pages; /* number of overflow pages */
|
2020-07-30 14:52:27 +03:00
|
|
|
|
__anonymous_struct_extension__ struct {
|
2017-05-23 21:04:23 +03:00
|
|
|
|
indx_t mp_lower; /* lower bound of free space */
|
|
|
|
|
indx_t mp_upper; /* upper bound of free space */
|
|
|
|
|
};
|
|
|
|
|
};
|
2017-05-30 16:22:42 +03:00
|
|
|
|
pgno_t mp_pgno; /* page number */
|
2017-05-24 21:43:29 +03:00
|
|
|
|
|
2020-07-30 14:52:27 +03:00
|
|
|
|
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
|
|
|
|
(!defined(__cplusplus) && defined(_MSC_VER))
|
|
|
|
|
indx_t mp_ptrs[] /* dynamic size */;
|
|
|
|
|
#endif /* C99 */
|
2017-05-23 21:04:23 +03:00
|
|
|
|
} MDBX_page;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2022-07-08 21:48:08 +03:00
|
|
|
|
#define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags)
|
2022-07-07 15:48:24 +03:00
|
|
|
|
|
2023-04-24 20:59:18 +03:00
|
|
|
|
/* Drop legacy P_DIRTY flag for sub-pages for compatilibity,
|
|
|
|
|
* for assertions only. */
|
2022-07-07 15:48:24 +03:00
|
|
|
|
#define PAGETYPE_COMPAT(p) \
|
|
|
|
|
(unlikely(PAGETYPE_WHOLE(p) & P_SUBP) \
|
|
|
|
|
? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY) \
|
|
|
|
|
: PAGETYPE_WHOLE(p))
|
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Size of the page header, excluding dynamic data at the end */
|
2022-09-29 16:18:10 +03:00
|
|
|
|
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2022-11-29 01:10:44 +03:00
|
|
|
|
/* Pointer displacement without casting to char* to avoid pointer-aliasing */
|
|
|
|
|
#define ptr_disp(ptr, disp) ((void *)(((intptr_t)(ptr)) + ((intptr_t)(disp))))
|
|
|
|
|
|
|
|
|
|
/* Pointer distance as signed number of bytes */
|
|
|
|
|
#define ptr_dist(more, less) (((intptr_t)(more)) - ((intptr_t)(less)))
|
|
|
|
|
|
|
|
|
|
#define mp_next(mp) \
|
|
|
|
|
(*(MDBX_page **)ptr_disp((mp)->mp_ptrs, sizeof(void *) - sizeof(uint32_t)))
|
2022-11-26 16:58:10 +03:00
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
#pragma pack(pop)
|
|
|
|
|
|
2022-11-06 16:35:06 +03:00
|
|
|
|
typedef struct profgc_stat {
|
|
|
|
|
/* Монотонное время по "настенным часам"
|
|
|
|
|
* затраченное на чтение и поиск внутри GC */
|
|
|
|
|
uint64_t rtime_monotonic;
|
|
|
|
|
/* Процессорное время в режим пользователя
|
2022-12-03 15:35:27 +03:00
|
|
|
|
* на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
|
|
|
|
|
uint64_t xtime_cpu;
|
2022-11-06 16:35:06 +03:00
|
|
|
|
/* Количество итераций чтения-поиска внутри GC при выделении страниц */
|
|
|
|
|
uint32_t rsteps;
|
|
|
|
|
/* Количество запросов на выделение последовательностей страниц,
|
|
|
|
|
* т.е. когда запрашивает выделение больше одной страницы */
|
|
|
|
|
uint32_t xpages;
|
|
|
|
|
/* Счетчик выполнения по медленному пути (slow path execution count) */
|
|
|
|
|
uint32_t spe_counter;
|
|
|
|
|
/* page faults (hard page faults) */
|
|
|
|
|
uint32_t majflt;
|
|
|
|
|
} profgc_stat_t;
|
|
|
|
|
|
2021-04-27 18:02:11 +03:00
|
|
|
|
/* Statistics of page operations overall of all (running, completed and aborted)
|
|
|
|
|
* transactions */
|
2022-11-06 16:35:06 +03:00
|
|
|
|
typedef struct pgop_stat {
|
2021-04-27 18:02:11 +03:00
|
|
|
|
MDBX_atomic_uint64_t newly; /* Quantity of a new pages added */
|
|
|
|
|
MDBX_atomic_uint64_t cow; /* Quantity of pages copied for update */
|
|
|
|
|
MDBX_atomic_uint64_t clone; /* Quantity of parent's dirty pages clones
|
|
|
|
|
for nested transactions */
|
|
|
|
|
MDBX_atomic_uint64_t split; /* Page splits */
|
|
|
|
|
MDBX_atomic_uint64_t merge; /* Page merges */
|
|
|
|
|
MDBX_atomic_uint64_t spill; /* Quantity of spilled dirty pages */
|
|
|
|
|
MDBX_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */
|
|
|
|
|
MDBX_atomic_uint64_t
|
|
|
|
|
wops; /* Number of explicit write operations (not a pages) to a disk */
|
2022-09-25 12:47:31 +03:00
|
|
|
|
MDBX_atomic_uint64_t
|
|
|
|
|
msync; /* Number of explicit msync/flush-to-disk operations */
|
|
|
|
|
MDBX_atomic_uint64_t
|
|
|
|
|
fsync; /* Number of explicit fsync/flush-to-disk operations */
|
2022-11-06 16:35:06 +03:00
|
|
|
|
|
2022-12-04 20:04:13 +03:00
|
|
|
|
MDBX_atomic_uint64_t prefault; /* Number of prefault write operations */
|
2022-12-05 10:41:05 +03:00
|
|
|
|
MDBX_atomic_uint64_t mincore; /* Number of mincore() calls */
|
2022-12-04 20:04:13 +03:00
|
|
|
|
|
mdbx: переработка контроля "некогерентности" для уменьшения накладных расходов.
Существует проблема https://libmdbx.dqdkfa.ru/dead-github/issues/269,
которая проявляется только при специфической неупорядоченности внутри
ядра ОС, когда страницы, записанные в файл отображенный в память,
становятся видны в памяти посредством работы unified page cache:
- если записанная последней мета-страница "обгоняет" ранее записанные,
т.е. когда записанное в файл позже становится видимым в отображении
раньше, чем записанное ранее.
Теперь, вместо постоянной полной сверки записываемых страниц,
выполняется легковесная проверка при старте транзакций, с переключением
в режим "как раньше" при обнаружении проблемы.
В результате, в некоторых сценариях возвращается 5-10%
производительности, а в отдельных синтетических тестах до 30%.
2022-12-25 19:56:50 +03:00
|
|
|
|
MDBX_atomic_uint32_t
|
|
|
|
|
incoherence; /* number of https://libmdbx.dqdkfa.ru/dead-github/issues/269
|
|
|
|
|
caught */
|
|
|
|
|
MDBX_atomic_uint32_t reserved;
|
|
|
|
|
|
2022-11-06 16:35:06 +03:00
|
|
|
|
/* Статистика для профилирования GC.
|
|
|
|
|
* Логически эти данные может быть стоит вынести в другую структуру,
|
|
|
|
|
* но разница будет сугубо косметическая. */
|
|
|
|
|
struct {
|
|
|
|
|
/* Затраты на поддержку данных пользователя */
|
|
|
|
|
profgc_stat_t work;
|
|
|
|
|
/* Затраты на поддержку и обновления самой GC */
|
|
|
|
|
profgc_stat_t self;
|
|
|
|
|
/* Итераций обновления GC,
|
|
|
|
|
* больше 1 если были повторы/перезапуски */
|
|
|
|
|
uint32_t wloops;
|
|
|
|
|
/* Итерации слияния записей GC */
|
|
|
|
|
uint32_t coalescences;
|
|
|
|
|
/* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
|
|
|
|
|
uint32_t wipes;
|
|
|
|
|
/* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
|
|
|
|
|
uint32_t flushes;
|
|
|
|
|
/* Попытки пнуть тормозящих читателей */
|
|
|
|
|
uint32_t kicks;
|
|
|
|
|
} gc_prof;
|
|
|
|
|
} pgop_stat_t;
|
2021-04-27 18:02:11 +03:00
|
|
|
|
|
2019-11-11 12:48:31 +03:00
|
|
|
|
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
|
|
|
|
|
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
typedef void osal_ipclock_t;
|
2019-11-11 12:48:31 +03:00
|
|
|
|
#elif MDBX_LOCKING == MDBX_LOCKING_SYSV
|
|
|
|
|
|
|
|
|
|
#define MDBX_CLOCK_SIGN UINT32_C(0xF18D)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
typedef mdbx_pid_t osal_ipclock_t;
|
2019-11-11 12:48:31 +03:00
|
|
|
|
#ifndef EOWNERDEAD
|
|
|
|
|
#define EOWNERDEAD MDBX_RESULT_TRUE
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
|
2019-11-10 23:20:23 +03:00
|
|
|
|
MDBX_LOCKING == MDBX_LOCKING_POSIX2008
|
|
|
|
|
#define MDBX_CLOCK_SIGN UINT32_C(0x8017)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
typedef pthread_mutex_t osal_ipclock_t;
|
2019-11-10 23:20:23 +03:00
|
|
|
|
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
|
|
|
|
|
#define MDBX_CLOCK_SIGN UINT32_C(0xFC29)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
typedef sem_t osal_ipclock_t;
|
2019-11-10 23:20:23 +03:00
|
|
|
|
#else
|
|
|
|
|
#error "FIXME"
|
2019-11-11 12:48:31 +03:00
|
|
|
|
#endif /* MDBX_LOCKING */
|
2019-11-10 23:20:23 +03:00
|
|
|
|
|
2020-09-13 18:06:14 +03:00
|
|
|
|
#if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc);
|
|
|
|
|
MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc);
|
2019-11-10 23:20:23 +03:00
|
|
|
|
#endif /* MDBX_LOCKING */
|
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* Reader Lock Table
|
|
|
|
|
*
|
|
|
|
|
* Readers don't acquire any locks for their data access. Instead, they
|
|
|
|
|
* simply record their transaction ID in the reader table. The reader
|
|
|
|
|
* mutex is needed just to find an empty slot in the reader table. The
|
|
|
|
|
* slot's address is saved in thread-specific data so that subsequent
|
|
|
|
|
* read transactions started by the same thread need no further locking to
|
|
|
|
|
* proceed.
|
|
|
|
|
*
|
|
|
|
|
* If MDBX_NOTLS is set, the slot address is not saved in thread-specific data.
|
|
|
|
|
* No reader table is used if the database is on a read-only filesystem.
|
|
|
|
|
*
|
|
|
|
|
* Since the database uses multi-version concurrency control, readers don't
|
|
|
|
|
* actually need any locking. This table is used to keep track of which
|
|
|
|
|
* readers are using data from which old transactions, so that we'll know
|
|
|
|
|
* when a particular old transaction is no longer in use. Old transactions
|
|
|
|
|
* that have discarded any data pages can then have those pages reclaimed
|
|
|
|
|
* for use by a later write transaction.
|
|
|
|
|
*
|
|
|
|
|
* The lock table is constructed such that reader slots are aligned with the
|
|
|
|
|
* processor's cache line size. Any slot is only ever used by one thread.
|
|
|
|
|
* This alignment guarantees that there will be no contention or cache
|
|
|
|
|
* thrashing as threads update their own slot info, and also eliminates
|
|
|
|
|
* any need for locking when accessing a slot.
|
|
|
|
|
*
|
|
|
|
|
* A writer thread will scan every slot in the table to determine the oldest
|
|
|
|
|
* outstanding reader transaction. Any freed pages older than this will be
|
|
|
|
|
* reclaimed by the writer. The writer doesn't use any locks when scanning
|
|
|
|
|
* this table. This means that there's no guarantee that the writer will
|
|
|
|
|
* see the most up-to-date reader info, but that's not required for correct
|
|
|
|
|
* operation - all we need is to know the upper bound on the oldest reader,
|
|
|
|
|
* we don't care at all about the newest reader. So the only consequence of
|
|
|
|
|
* reading stale information here is that old pages might hang around a
|
|
|
|
|
* while longer before being reclaimed. That's actually good anyway, because
|
|
|
|
|
* the longer we delay reclaiming old pages, the more likely it is that a
|
|
|
|
|
* string of contiguous pages can be found after coalescing old pages from
|
|
|
|
|
* many old transactions together. */
|
|
|
|
|
|
|
|
|
|
/* The actual reader record, with cacheline padding. */
|
|
|
|
|
typedef struct MDBX_reader {
|
|
|
|
|
/* Current Transaction ID when this transaction began, or (txnid_t)-1.
|
|
|
|
|
* Multiple readers that start at the same time will probably have the
|
|
|
|
|
* same ID here. Again, it's not important to exclude them from
|
|
|
|
|
* anything; all we need to know is which version of the DB they
|
|
|
|
|
* started from so we can avoid overwriting any data used in that
|
|
|
|
|
* particular version. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t /* txnid_t */ mr_txnid;
|
2019-08-23 03:36:56 +03:00
|
|
|
|
|
|
|
|
|
/* The information we store in a single slot of the reader table.
|
|
|
|
|
* In addition to a transaction ID, we also record the process and
|
|
|
|
|
* thread ID that owns a slot, so that we can detect stale information,
|
|
|
|
|
* e.g. threads or processes that went away without cleaning up.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: We currently don't check for stale records.
|
|
|
|
|
* We simply re-init the table when we know that we're the only process
|
|
|
|
|
* opening the lock file. */
|
|
|
|
|
|
|
|
|
|
/* The thread ID of the thread owning this txn. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t mr_tid;
|
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* The process ID of the process owning this reader txn. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint32_t mr_pid;
|
2019-09-23 15:32:29 +03:00
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* The number of pages used in the reader's MVCC snapshot,
|
|
|
|
|
* i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
atomic_pgno_t mr_snapshot_pages_used;
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* Number of retired pages at the time this reader starts transaction. So,
|
|
|
|
|
* at any time the difference mm_pages_retired - mr_snapshot_pages_retired
|
|
|
|
|
* will give the number of pages which this reader restraining from reuse. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t mr_snapshot_pages_retired;
|
2019-08-23 03:36:56 +03:00
|
|
|
|
} MDBX_reader;
|
|
|
|
|
|
2017-03-16 18:09:27 +03:00
|
|
|
|
/* The header for the reader table (a memory-mapped lock file). */
|
|
|
|
|
typedef struct MDBX_lockinfo {
|
2017-05-30 16:22:42 +03:00
|
|
|
|
/* Stamp identifying this as an MDBX file.
|
|
|
|
|
* It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */
|
|
|
|
|
uint64_t mti_magic_and_version;
|
|
|
|
|
|
2017-05-24 01:42:10 +03:00
|
|
|
|
/* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */
|
2017-05-30 16:22:42 +03:00
|
|
|
|
uint32_t mti_os_and_format;
|
|
|
|
|
|
2017-04-21 19:00:33 +03:00
|
|
|
|
/* Flags which environment was opened. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint32_t mti_envmode;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* Threshold of un-synced-with-disk pages for auto-sync feature,
|
|
|
|
|
* zero means no-threshold, i.e. auto-sync is disabled. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
atomic_pgno_t mti_autosync_threshold;
|
2019-09-11 19:12:57 +03:00
|
|
|
|
|
2019-09-29 23:51:43 +03:00
|
|
|
|
/* Low 32-bit of txnid with which meta-pages was synced,
|
|
|
|
|
* i.e. for sync-polling in the MDBX_NOMETASYNC mode. */
|
2022-12-03 14:55:38 +03:00
|
|
|
|
#define MDBX_NOMETASYNC_LAZY_UNK (UINT32_MAX / 3)
|
|
|
|
|
#define MDBX_NOMETASYNC_LAZY_FD (MDBX_NOMETASYNC_LAZY_UNK + UINT32_MAX / 8)
|
|
|
|
|
#define MDBX_NOMETASYNC_LAZY_WRITEMAP \
|
|
|
|
|
(MDBX_NOMETASYNC_LAZY_UNK - UINT32_MAX / 8)
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint32_t mti_meta_sync_txnid;
|
2019-09-11 19:12:57 +03:00
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* Period for timed auto-sync feature, i.e. at the every steady checkpoint
|
|
|
|
|
* the mti_unsynced_timeout sets to the current_time + mti_autosync_period.
|
|
|
|
|
* The time value is represented in a suitable system-dependent form, for
|
|
|
|
|
* example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC).
|
|
|
|
|
* Zero means timed auto-sync is disabled. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t mti_autosync_period;
|
2019-08-23 03:36:56 +03:00
|
|
|
|
|
2021-04-17 00:13:51 +03:00
|
|
|
|
/* Marker to distinguish uniqueness of DB/CLK. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
2019-08-31 00:55:15 +03:00
|
|
|
|
|
2022-11-04 19:43:48 +03:00
|
|
|
|
/* Paired counter of processes that have mlock()ed part of mmapped DB.
|
|
|
|
|
* The (mti_mlcnt[0] - mti_mlcnt[1]) > 0 means at least one process
|
2023-01-10 14:16:08 +03:00
|
|
|
|
* lock at least one page, so therefore madvise() could return EINVAL. */
|
2022-11-04 19:43:48 +03:00
|
|
|
|
MDBX_atomic_uint32_t mti_mlcnt[2];
|
2022-10-24 01:02:38 +03:00
|
|
|
|
|
2022-01-31 23:29:03 +03:00
|
|
|
|
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
2019-11-10 23:20:23 +03:00
|
|
|
|
|
2022-01-31 23:29:03 +03:00
|
|
|
|
/* Statistics of costly ops of all (running, completed and aborted)
|
|
|
|
|
* transactions */
|
2022-11-06 16:35:06 +03:00
|
|
|
|
pgop_stat_t mti_pgop_stat;
|
2021-04-27 18:02:11 +03:00
|
|
|
|
|
2022-01-31 23:29:03 +03:00
|
|
|
|
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
2021-04-27 18:02:11 +03:00
|
|
|
|
|
2020-09-21 23:51:47 -04:00
|
|
|
|
/* Write transaction lock. */
|
2019-11-10 23:20:23 +03:00
|
|
|
|
#if MDBX_LOCKING > 0
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_ipclock_t mti_wlock;
|
2019-11-11 12:48:31 +03:00
|
|
|
|
#endif /* MDBX_LOCKING > 0 */
|
2018-03-22 20:34:09 +03:00
|
|
|
|
|
2021-01-30 02:28:12 +03:00
|
|
|
|
atomic_txnid_t mti_oldest_reader;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2022-10-11 13:11:12 +03:00
|
|
|
|
/* Timestamp of entering an out-of-sync state. Value is represented in a
|
|
|
|
|
* suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
|
|
|
|
|
* or clock_gettime(CLOCK_MONOTONIC). */
|
|
|
|
|
MDBX_atomic_uint64_t mti_eoos_timestamp;
|
2017-06-14 23:33:13 +03:00
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* Number un-synced-with-disk pages for auto-sync feature. */
|
2022-10-11 13:11:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t mti_unsynced_pages;
|
2019-08-28 04:57:07 +03:00
|
|
|
|
|
2019-09-11 19:12:57 +03:00
|
|
|
|
/* Timestamp of the last readers check. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
2019-09-11 19:12:57 +03:00
|
|
|
|
|
2022-11-04 19:43:48 +03:00
|
|
|
|
/* Number of page which was discarded last time by madvise(DONTNEED). */
|
2022-10-11 13:11:12 +03:00
|
|
|
|
atomic_pgno_t mti_discarded_tail;
|
|
|
|
|
|
2021-04-17 00:13:51 +03:00
|
|
|
|
/* Shared anchor for tracking readahead edge and enabled/disabled status. */
|
|
|
|
|
pgno_t mti_readahead_anchor;
|
|
|
|
|
|
2022-12-05 10:41:05 +03:00
|
|
|
|
/* Shared cache for mincore() results */
|
|
|
|
|
struct {
|
|
|
|
|
pgno_t begin[4];
|
|
|
|
|
uint64_t mask[4];
|
|
|
|
|
} mti_mincore_cache;
|
|
|
|
|
|
2022-01-31 23:29:03 +03:00
|
|
|
|
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
2018-03-22 20:34:09 +03:00
|
|
|
|
|
2019-11-06 23:53:53 +03:00
|
|
|
|
/* Readeaders registration lock. */
|
2019-11-10 23:20:23 +03:00
|
|
|
|
#if MDBX_LOCKING > 0
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_ipclock_t mti_rlock;
|
2019-11-11 12:48:31 +03:00
|
|
|
|
#endif /* MDBX_LOCKING > 0 */
|
2017-06-30 00:20:33 +03:00
|
|
|
|
|
2019-08-23 03:36:56 +03:00
|
|
|
|
/* The number of slots that have been used in the reader table.
|
|
|
|
|
* This always records the maximum count, it is not decremented
|
|
|
|
|
* when readers release their slots. */
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint32_t mti_numreaders;
|
|
|
|
|
MDBX_atomic_uint32_t mti_readers_refresh_flag;
|
2017-06-14 23:33:13 +03:00
|
|
|
|
|
2020-07-30 14:52:27 +03:00
|
|
|
|
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
|
|
|
|
(!defined(__cplusplus) && defined(_MSC_VER))
|
2022-01-31 23:29:03 +03:00
|
|
|
|
MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
|
|
|
|
|
MDBX_reader mti_readers[] /* dynamic size */;
|
2020-07-30 14:52:27 +03:00
|
|
|
|
#endif /* C99 */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
} MDBX_lockinfo;
|
|
|
|
|
|
2017-05-30 16:22:42 +03:00
|
|
|
|
/* Lockfile format signature: version, features and field layout */
|
|
|
|
|
#define MDBX_LOCK_FORMAT \
|
2019-11-10 23:20:23 +03:00
|
|
|
|
(MDBX_CLOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 + \
|
2019-08-23 03:36:56 +03:00
|
|
|
|
(unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 + \
|
|
|
|
|
(unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 + \
|
2019-10-22 22:31:06 +03:00
|
|
|
|
(unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 + \
|
|
|
|
|
(unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29)
|
2017-05-30 16:22:42 +03:00
|
|
|
|
|
2021-01-20 13:05:32 +03:00
|
|
|
|
#define MDBX_DATA_MAGIC \
|
|
|
|
|
((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION)
|
2021-10-21 15:17:18 +03:00
|
|
|
|
|
|
|
|
|
#define MDBX_DATA_MAGIC_LEGACY_COMPAT \
|
|
|
|
|
((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2)
|
|
|
|
|
|
|
|
|
|
#define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255)
|
2017-05-30 16:22:42 +03:00
|
|
|
|
|
|
|
|
|
#define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION)
|
|
|
|
|
|
2019-09-02 13:23:39 +03:00
|
|
|
|
/* The maximum size of a database page.
|
|
|
|
|
*
|
|
|
|
|
* It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper.
|
|
|
|
|
*
|
|
|
|
|
* MDBX will use database pages < OS pages if needed.
|
|
|
|
|
* That causes more I/O in write transactions: The OS must
|
|
|
|
|
* know (read) the whole page before writing a partial page.
|
|
|
|
|
*
|
|
|
|
|
* Note that we don't currently support Huge pages. On Linux,
|
|
|
|
|
* regular data files cannot use Huge pages, and in general
|
|
|
|
|
* Huge pages aren't actually pageable. We rely on the OS
|
|
|
|
|
* demand-pager to read our data and page it out when memory
|
|
|
|
|
* pressure from other processes is high. So until OSs have
|
|
|
|
|
* actual paging support for Huge pages, they're not viable. */
|
2019-10-29 18:54:03 +03:00
|
|
|
|
#define MAX_PAGESIZE MDBX_MAX_PAGESIZE
|
|
|
|
|
#define MIN_PAGESIZE MDBX_MIN_PAGESIZE
|
2019-09-02 13:23:39 +03:00
|
|
|
|
|
|
|
|
|
#define MIN_MAPSIZE (MIN_PAGESIZE * MIN_PAGENO)
|
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
|
|
|
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
|
|
|
|
|
#else
|
2020-10-09 00:14:59 +03:00
|
|
|
|
#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
|
2019-09-02 13:23:39 +03:00
|
|
|
|
#endif
|
2022-01-21 02:14:36 +03:00
|
|
|
|
#define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MAX_PAGESIZE)
|
2019-09-02 13:23:39 +03:00
|
|
|
|
|
|
|
|
|
#if MDBX_WORDBITS >= 64
|
|
|
|
|
#define MAX_MAPSIZE MAX_MAPSIZE64
|
2021-04-21 00:13:51 +03:00
|
|
|
|
#define MDBX_PGL_LIMIT ((size_t)MAX_PAGENO)
|
2019-09-02 13:23:39 +03:00
|
|
|
|
#else
|
|
|
|
|
#define MAX_MAPSIZE MAX_MAPSIZE32
|
2020-12-01 20:20:29 +03:00
|
|
|
|
#define MDBX_PGL_LIMIT (MAX_MAPSIZE32 / MIN_PAGESIZE)
|
2019-09-02 13:23:39 +03:00
|
|
|
|
#endif /* MDBX_WORDBITS */
|
|
|
|
|
|
2021-07-11 02:44:19 +03:00
|
|
|
|
#define MDBX_READERS_LIMIT 32767
|
2022-12-06 22:20:00 +03:00
|
|
|
|
#define MDBX_RADIXSORT_THRESHOLD 142
|
2022-12-08 15:35:41 +03:00
|
|
|
|
#define MDBX_GOLD_RATIO_DBL 1.6180339887498948482
|
2021-04-20 22:14:13 +03:00
|
|
|
|
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
|
2021-01-20 13:05:32 +03:00
|
|
|
|
/* An PNL is an Page Number List, a sorted array of IDs.
|
|
|
|
|
* The first element of the array is a counter for how many actual page-numbers
|
|
|
|
|
* are in the list. By default PNLs are sorted in descending order, this allow
|
|
|
|
|
* cut off a page with lowest pgno (at the tail) just truncating the list. The
|
|
|
|
|
* sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */
|
2017-07-26 09:31:22 +03:00
|
|
|
|
typedef pgno_t *MDBX_PNL;
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2017-07-24 00:54:10 +03:00
|
|
|
|
#if MDBX_PNL_ASCENDING
|
|
|
|
|
#define MDBX_PNL_ORDERED(first, last) ((first) < (last))
|
|
|
|
|
#define MDBX_PNL_DISORDERED(first, last) ((first) >= (last))
|
|
|
|
|
#else
|
|
|
|
|
#define MDBX_PNL_ORDERED(first, last) ((first) > (last))
|
|
|
|
|
#define MDBX_PNL_DISORDERED(first, last) ((first) <= (last))
|
|
|
|
|
#endif
|
|
|
|
|
|
2019-10-08 19:57:17 +03:00
|
|
|
|
/* List of txnid, only for MDBX_txn.tw.lifo_reclaimed */
|
2017-06-05 14:02:44 +03:00
|
|
|
|
typedef txnid_t *MDBX_TXL;
|
|
|
|
|
|
2018-09-01 19:36:45 +03:00
|
|
|
|
/* An Dirty-Page list item is an pgno/pointer pair. */
|
2020-12-02 14:17:57 +03:00
|
|
|
|
typedef struct MDBX_dp {
|
|
|
|
|
MDBX_page *ptr;
|
2023-01-01 01:26:55 +03:00
|
|
|
|
pgno_t pgno, npages;
|
2020-12-02 14:17:57 +03:00
|
|
|
|
} MDBX_dp;
|
|
|
|
|
|
|
|
|
|
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
|
|
|
|
typedef struct MDBX_dpl {
|
2022-09-29 16:18:10 +03:00
|
|
|
|
size_t sorted;
|
|
|
|
|
size_t length;
|
|
|
|
|
size_t pages_including_loose; /* number of pages, but not an entries. */
|
|
|
|
|
size_t detent; /* allocated size excluding the MDBX_DPL_RESERVE_GAP */
|
2020-12-02 14:17:57 +03:00
|
|
|
|
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
|
|
|
|
(!defined(__cplusplus) && defined(_MSC_VER))
|
|
|
|
|
MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
|
|
|
|
|
#endif
|
|
|
|
|
} MDBX_dpl;
|
2018-09-01 19:36:45 +03:00
|
|
|
|
|
2019-11-24 19:04:21 +03:00
|
|
|
|
/* PNL sizes */
|
2022-12-08 15:35:41 +03:00
|
|
|
|
#define MDBX_PNL_GRANULATE_LOG2 10
|
|
|
|
|
#define MDBX_PNL_GRANULATE (1 << MDBX_PNL_GRANULATE_LOG2)
|
2018-09-01 19:36:45 +03:00
|
|
|
|
#define MDBX_PNL_INITIAL \
|
|
|
|
|
(MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))
|
2020-06-04 20:09:02 +03:00
|
|
|
|
|
2018-09-01 19:36:45 +03:00
|
|
|
|
#define MDBX_TXL_GRANULATE 32
|
|
|
|
|
#define MDBX_TXL_INITIAL \
|
|
|
|
|
(MDBX_TXL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
|
|
|
|
#define MDBX_TXL_MAX \
|
2022-11-26 01:04:21 +03:00
|
|
|
|
((1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t))
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2017-07-26 09:31:22 +03:00
|
|
|
|
#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
|
2022-09-29 16:18:10 +03:00
|
|
|
|
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
|
|
|
|
|
#define MDBX_PNL_SETSIZE(pl, size) \
|
|
|
|
|
do { \
|
|
|
|
|
const size_t __size = size; \
|
|
|
|
|
assert(__size < INT_MAX); \
|
|
|
|
|
(pl)[0] = (pgno_t)__size; \
|
|
|
|
|
} while (0)
|
2018-09-01 19:36:45 +03:00
|
|
|
|
#define MDBX_PNL_FIRST(pl) ((pl)[1])
|
2022-09-29 16:18:10 +03:00
|
|
|
|
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
|
2018-09-01 19:36:45 +03:00
|
|
|
|
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
|
2022-09-29 16:18:10 +03:00
|
|
|
|
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])
|
2018-09-01 19:36:45 +03:00
|
|
|
|
|
2019-10-13 10:26:28 +03:00
|
|
|
|
#if MDBX_PNL_ASCENDING
|
2022-11-25 18:04:43 +03:00
|
|
|
|
#define MDBX_PNL_EDGE(pl) ((pl) + 1)
|
2019-10-13 10:26:28 +03:00
|
|
|
|
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
|
|
|
|
|
#define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl)
|
|
|
|
|
#else
|
2022-11-25 18:04:43 +03:00
|
|
|
|
#define MDBX_PNL_EDGE(pl) ((pl) + MDBX_PNL_GETSIZE(pl))
|
2019-10-13 10:26:28 +03:00
|
|
|
|
#define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl)
|
|
|
|
|
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
|
|
|
|
|
#endif
|
|
|
|
|
|
2022-09-29 16:18:10 +03:00
|
|
|
|
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
|
|
|
|
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
/* Internal structures */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-04-26 18:12:48 +03:00
|
|
|
|
/* Auxiliary DB info.
|
|
|
|
|
* The information here is mostly static/read-only. There is
|
|
|
|
|
* only a single copy of this record in the environment. */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
typedef struct MDBX_dbx {
|
2020-05-15 08:59:03 +03:00
|
|
|
|
MDBX_val md_name; /* name of the database */
|
|
|
|
|
MDBX_cmp_func *md_cmp; /* function for comparing keys */
|
|
|
|
|
MDBX_cmp_func *md_dcmp; /* function for comparing data items */
|
|
|
|
|
size_t md_klen_min, md_klen_max; /* min/max key length for the database */
|
|
|
|
|
size_t md_vlen_min,
|
|
|
|
|
md_vlen_max; /* min/max value/data length for the database */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
} MDBX_dbx;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2022-08-20 00:28:32 +03:00
|
|
|
|
typedef struct troika {
|
2022-08-17 15:10:05 +03:00
|
|
|
|
uint8_t fsm, recent, prefer_steady, tail_and_flags;
|
2022-11-07 00:14:24 +03:00
|
|
|
|
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
|
|
|
|
|
uint32_t unused_pad;
|
|
|
|
|
#endif
|
2023-04-24 20:59:18 +03:00
|
|
|
|
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7u)
|
|
|
|
|
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64u)
|
|
|
|
|
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128u)
|
|
|
|
|
#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3u)
|
2022-08-17 15:10:05 +03:00
|
|
|
|
txnid_t txnid[NUM_METAS];
|
2022-08-20 00:28:32 +03:00
|
|
|
|
} meta_troika_t;
|
2022-08-17 15:10:05 +03:00
|
|
|
|
|
2017-04-26 18:12:48 +03:00
|
|
|
|
/* A database transaction.
|
|
|
|
|
* Every operation requires a transaction handle. */
|
2017-05-23 21:36:09 +03:00
|
|
|
|
struct MDBX_txn {
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MDBX_MT_SIGNATURE UINT32_C(0x93D53A31)
|
2021-05-10 15:59:50 +03:00
|
|
|
|
uint32_t mt_signature;
|
2019-09-17 23:52:51 +03:00
|
|
|
|
|
|
|
|
|
/* Transaction Flags */
|
2019-10-22 22:31:06 +03:00
|
|
|
|
/* mdbx_txn_begin() flags */
|
2020-08-03 12:56:57 +03:00
|
|
|
|
#define MDBX_TXN_RO_BEGIN_FLAGS (MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE)
|
|
|
|
|
#define MDBX_TXN_RW_BEGIN_FLAGS \
|
|
|
|
|
(MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
/* Additional flag for sync_locked() */
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
|
|
|
|
|
2022-12-08 12:58:56 +03:00
|
|
|
|
#define MDBX_TXN_DRAINED_GC 0x20 /* GC was depleted up to oldest reader */
|
|
|
|
|
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#define TXN_FLAGS \
|
|
|
|
|
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
2022-12-08 12:58:56 +03:00
|
|
|
|
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_DRAINED_GC)
|
2020-07-05 15:22:41 +03:00
|
|
|
|
|
2020-08-03 12:56:57 +03:00
|
|
|
|
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
|
|
|
|
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
|
|
|
|
MDBX_SHRINK_ALLOWED)
|
2022-05-02 10:35:40 +03:00
|
|
|
|
#error "Oops, some txn flags overlapped or wrong"
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#endif
|
2021-05-10 15:59:50 +03:00
|
|
|
|
uint32_t mt_flags;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
unsigned mt_numdbs;
|
|
|
|
|
size_t mt_owner; /* thread ID that owns this transaction */
|
2021-05-10 15:59:50 +03:00
|
|
|
|
|
|
|
|
|
MDBX_txn *mt_parent; /* parent of a nested txn */
|
|
|
|
|
/* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */
|
|
|
|
|
MDBX_txn *mt_child;
|
|
|
|
|
MDBX_geo mt_geo;
|
|
|
|
|
/* next unallocated page */
|
|
|
|
|
#define mt_next_pgno mt_geo.next
|
|
|
|
|
/* corresponding to the current size of datafile */
|
|
|
|
|
#define mt_end_pgno mt_geo.now
|
2020-07-05 15:22:41 +03:00
|
|
|
|
|
2022-06-20 20:16:54 +03:00
|
|
|
|
/* The ID of this transaction. IDs are integers incrementing from
|
|
|
|
|
* INITIAL_TXNID. Only committed write transactions increment the ID. If a
|
|
|
|
|
* transaction aborts, the ID may be re-used by the next writer. */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
txnid_t mt_txnid;
|
2021-04-14 01:41:42 +03:00
|
|
|
|
txnid_t mt_front;
|
|
|
|
|
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_env *mt_env; /* the DB environment */
|
|
|
|
|
/* Array of MDBX_db records for each known DB */
|
|
|
|
|
MDBX_db *mt_dbs;
|
2019-10-17 14:07:47 +03:00
|
|
|
|
|
mdbx: переработка инициализации, проверки и импорта dbi-хендлов в транзакциях.
Ранее инициализация в транзакциях структур данных, связанных с
dbi-хендлами и subDb, выполнялась непосредственно при запуске
транзакций. Что в сценариях с большим кол-вом dbi-дексприторов (например
libfpta) порождало заметные накладные расходы, которые расли линейно от
общего кол-ва открытых subDb, а не от реально используемых в транзакции.
При использовании одной-двух сотен хендлов, при старте каждой транзакции
могли копироваться и/или обнуляться десятки килобайт. Теперь этот
недостаток устранен.
Изменена схема инициализации, валидации и импорта хендлов открытых после
старта транзакции:
1) Инициализация теперь выполняется отложенна, а при старте транзации
обнуляется только массив с однобайтовыми статустами dbi-хендлов.
При этом доступнва опция сборки `MDBX_ENABLE_DBI_SPARSE`, при активации
которой используется битовая карты, что снижает объем инициализации
при старте транзакции в 8 раз (CHAR_BIT).
2) Переработана валидация dbi-хендлов на входах API, с уменьшением кол-ва
проверок и ветвлений до теоретического минимума.
3) Переработ импорт dbi-хендов открытых после старта транзакци, теперь
при этом не захватывается мьютекс.
2023-11-05 22:10:29 +03:00
|
|
|
|
#if MDBX_ENABLE_DBI_SPARSE
|
|
|
|
|
unsigned *mt_dbi_sparse;
|
|
|
|
|
#endif /* MDBX_ENABLE_DBI_SPARSE */
|
|
|
|
|
|
2023-11-03 11:30:54 +03:00
|
|
|
|
/* Non-shared DBI state flags inside transaction */
|
mdbx: переработка инициализации, проверки и импорта dbi-хендлов в транзакциях.
Ранее инициализация в транзакциях структур данных, связанных с
dbi-хендлами и subDb, выполнялась непосредственно при запуске
транзакций. Что в сценариях с большим кол-вом dbi-дексприторов (например
libfpta) порождало заметные накладные расходы, которые расли линейно от
общего кол-ва открытых subDb, а не от реально используемых в транзакции.
При использовании одной-двух сотен хендлов, при старте каждой транзакции
могли копироваться и/или обнуляться десятки килобайт. Теперь этот
недостаток устранен.
Изменена схема инициализации, валидации и импорта хендлов открытых после
старта транзакции:
1) Инициализация теперь выполняется отложенна, а при старте транзации
обнуляется только массив с однобайтовыми статустами dbi-хендлов.
При этом доступнва опция сборки `MDBX_ENABLE_DBI_SPARSE`, при активации
которой используется битовая карты, что снижает объем инициализации
при старте транзакции в 8 раз (CHAR_BIT).
2) Переработана валидация dbi-хендлов на входах API, с уменьшением кол-ва
проверок и ветвлений до теоретического минимума.
3) Переработ импорт dbi-хендов открытых после старта транзакци, теперь
при этом не захватывается мьютекс.
2023-11-05 22:10:29 +03:00
|
|
|
|
#define DBI_DIRTY 0x01 /* DB was written in this txn */
|
|
|
|
|
#define DBI_STALE 0x02 /* Named-DB record is older than txnID */
|
|
|
|
|
#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */
|
|
|
|
|
#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */
|
|
|
|
|
#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */
|
|
|
|
|
#define DBI_OLDEN 0x40 /* Handle was closed/reopened outside txn */
|
|
|
|
|
#define DBI_LINDO 0x80 /* Lazy initialization done for DBI-slot */
|
2023-11-03 11:28:13 +03:00
|
|
|
|
/* Array of non-shared txn's flags of DBI */
|
2023-11-03 11:30:54 +03:00
|
|
|
|
uint8_t *mt_dbi_state;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
|
|
|
|
|
/* Array of sequence numbers for each DB handle. */
|
mdbx: переработка инициализации, проверки и импорта dbi-хендлов в транзакциях.
Ранее инициализация в транзакциях структур данных, связанных с
dbi-хендлами и subDb, выполнялась непосредственно при запуске
транзакций. Что в сценариях с большим кол-вом dbi-дексприторов (например
libfpta) порождало заметные накладные расходы, которые расли линейно от
общего кол-ва открытых subDb, а не от реально используемых в транзакции.
При использовании одной-двух сотен хендлов, при старте каждой транзакции
могли копироваться и/или обнуляться десятки килобайт. Теперь этот
недостаток устранен.
Изменена схема инициализации, валидации и импорта хендлов открытых после
старта транзакции:
1) Инициализация теперь выполняется отложенна, а при старте транзации
обнуляется только массив с однобайтовыми статустами dbi-хендлов.
При этом доступнва опция сборки `MDBX_ENABLE_DBI_SPARSE`, при активации
которой используется битовая карты, что снижает объем инициализации
при старте транзакции в 8 раз (CHAR_BIT).
2) Переработана валидация dbi-хендлов на входах API, с уменьшением кол-ва
проверок и ветвлений до теоретического минимума.
3) Переработ импорт dbi-хендов открытых после старта транзакци, теперь
при этом не захватывается мьютекс.
2023-11-05 22:10:29 +03:00
|
|
|
|
uint32_t *mt_dbi_seqs;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
MDBX_cursor **mt_cursors;
|
|
|
|
|
|
2020-07-23 19:24:21 +03:00
|
|
|
|
MDBX_canary mt_canary;
|
2020-09-29 14:41:44 +03:00
|
|
|
|
void *mt_userctx; /* User-settable context */
|
2019-10-17 14:07:47 +03:00
|
|
|
|
|
2019-10-08 19:57:17 +03:00
|
|
|
|
union {
|
|
|
|
|
struct {
|
|
|
|
|
/* For read txns: This thread/txn's reader table slot, or NULL. */
|
|
|
|
|
MDBX_reader *reader;
|
|
|
|
|
} to;
|
|
|
|
|
struct {
|
2022-08-20 00:28:32 +03:00
|
|
|
|
meta_troika_t troika;
|
2020-11-09 01:38:46 +03:00
|
|
|
|
/* In write txns, array of cursors for each DB */
|
2022-12-09 13:20:38 +03:00
|
|
|
|
MDBX_PNL relist; /* Reclaimed GC pages */
|
2022-11-06 16:35:06 +03:00
|
|
|
|
txnid_t last_reclaimed; /* ID of last used record */
|
2021-01-18 17:34:54 +03:00
|
|
|
|
#if MDBX_ENABLE_REFUND
|
2019-10-17 10:08:49 +03:00
|
|
|
|
pgno_t loose_refund_wl /* FIXME: describe */;
|
2021-01-18 17:34:54 +03:00
|
|
|
|
#endif /* MDBX_ENABLE_REFUND */
|
2022-09-29 16:18:10 +03:00
|
|
|
|
/* a sequence to spilling dirty page with LRU policy */
|
|
|
|
|
unsigned dirtylru;
|
2019-10-17 10:08:49 +03:00
|
|
|
|
/* dirtylist room: Dirty array size - dirty pages visible to this txn.
|
|
|
|
|
* Includes ancestor txns' dirty pages not hidden by other txns'
|
|
|
|
|
* dirty/spilled pages. Thus commit(nested txn) has room to merge
|
|
|
|
|
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
|
2022-09-29 16:18:10 +03:00
|
|
|
|
size_t dirtyroom;
|
2019-10-17 10:08:49 +03:00
|
|
|
|
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
|
2020-12-02 14:17:57 +03:00
|
|
|
|
MDBX_dpl *dirtylist;
|
2019-10-08 19:57:17 +03:00
|
|
|
|
/* The list of reclaimed txns from GC */
|
|
|
|
|
MDBX_TXL lifo_reclaimed;
|
|
|
|
|
/* The list of pages that became unused during this transaction. */
|
|
|
|
|
MDBX_PNL retired_pages;
|
|
|
|
|
/* The list of loose pages that became unused and may be reused
|
2019-10-17 14:07:47 +03:00
|
|
|
|
* in this transaction, linked through `mp_next`. */
|
2019-10-08 19:57:17 +03:00
|
|
|
|
MDBX_page *loose_pages;
|
|
|
|
|
/* Number of loose pages (tw.loose_pages) */
|
2022-09-29 16:18:10 +03:00
|
|
|
|
size_t loose_count;
|
2022-11-13 20:59:31 +03:00
|
|
|
|
union {
|
|
|
|
|
struct {
|
|
|
|
|
size_t least_removed;
|
|
|
|
|
/* The sorted list of dirty pages we temporarily wrote to disk
|
|
|
|
|
* because the dirty list was full. page numbers in here are
|
|
|
|
|
* shifted left by 1, deleted slots have the LSB set. */
|
|
|
|
|
MDBX_PNL list;
|
|
|
|
|
} spilled;
|
|
|
|
|
size_t writemap_dirty_npages;
|
2022-12-14 11:43:22 +03:00
|
|
|
|
size_t writemap_spilled_npages;
|
2022-11-13 20:59:31 +03:00
|
|
|
|
};
|
2019-10-08 19:57:17 +03:00
|
|
|
|
} tw;
|
|
|
|
|
};
|
2017-03-16 18:09:27 +03:00
|
|
|
|
};
|
|
|
|
|
|
2019-10-22 00:31:15 +03:00
|
|
|
|
#if MDBX_WORDBITS >= 64
|
2020-12-04 14:06:02 +03:00
|
|
|
|
#define CURSOR_STACK 32
|
2019-10-22 00:31:15 +03:00
|
|
|
|
#else
|
2020-12-04 14:06:02 +03:00
|
|
|
|
#define CURSOR_STACK 24
|
2019-10-22 00:31:15 +03:00
|
|
|
|
#endif
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-05-24 01:42:10 +03:00
|
|
|
|
struct MDBX_xcursor;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Cursors are used for all DB operations.
|
|
|
|
|
* A cursor holds a path of (page pointer, key index) from the DB
|
2017-05-24 01:42:10 +03:00
|
|
|
|
* root to a position in the DB, plus other state. MDBX_DUPSORT
|
2017-05-23 14:44:53 +03:00
|
|
|
|
* cursors include an xcursor to the current data item. Write txns
|
|
|
|
|
* track their cursors and keep them up to date when data moves.
|
|
|
|
|
* Exception: An xcursor's pointer to a P_SUBP page can be stale.
|
|
|
|
|
* (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
struct MDBX_cursor {
|
2020-09-15 02:05:25 +03:00
|
|
|
|
#define MDBX_MC_LIVE UINT32_C(0xFE05D5B1)
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047)
|
|
|
|
|
#define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7)
|
|
|
|
|
uint32_t mc_signature;
|
2017-07-03 09:56:46 +03:00
|
|
|
|
/* The database handle this cursor operates on */
|
|
|
|
|
MDBX_dbi mc_dbi;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Next cursor on this DB in this txn */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_cursor *mc_next;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Backup of the original cursor if this cursor is a shadow */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_cursor *mc_backup;
|
|
|
|
|
/* Context used for databases with MDBX_DUPSORT, otherwise NULL */
|
|
|
|
|
struct MDBX_xcursor *mc_xcursor;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* The transaction that owns this cursor */
|
2017-05-23 21:36:09 +03:00
|
|
|
|
MDBX_txn *mc_txn;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* The database record for this cursor */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_db *mc_db;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* The database auxiliary record for this cursor */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_dbx *mc_dbx;
|
2023-11-03 11:30:54 +03:00
|
|
|
|
/* The mt_dbi_state[] for this DBI */
|
|
|
|
|
uint8_t *mc_dbi_state;
|
2022-06-30 21:38:32 +03:00
|
|
|
|
uint8_t mc_snum; /* number of pushed pages */
|
|
|
|
|
uint8_t mc_top; /* index of top page, normally mc_snum-1 */
|
2020-05-15 08:59:03 +03:00
|
|
|
|
|
|
|
|
|
/* Cursor state flags. */
|
|
|
|
|
#define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */
|
|
|
|
|
#define C_EOF 0x02 /* No more data */
|
|
|
|
|
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
|
|
|
|
#define C_DEL 0x08 /* last op was a cursor_del */
|
|
|
|
|
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
2022-11-22 01:11:46 +03:00
|
|
|
|
#define C_GCU \
|
|
|
|
|
0x20 /* Происходит подготовка к обновлению GC, поэтому \
|
|
|
|
|
* можно брать страницы из GC даже для FREE_DBI */
|
2022-11-06 16:35:06 +03:00
|
|
|
|
uint8_t mc_flags;
|
2020-05-15 08:59:03 +03:00
|
|
|
|
|
2020-09-21 23:51:47 -04:00
|
|
|
|
/* Cursor checking flags. */
|
2022-06-30 21:38:32 +03:00
|
|
|
|
#define CC_BRANCH 0x01 /* same as P_BRANCH for CHECK_LEAF_TYPE() */
|
|
|
|
|
#define CC_LEAF 0x02 /* same as P_LEAF for CHECK_LEAF_TYPE() */
|
2022-07-04 21:22:39 +03:00
|
|
|
|
#define CC_OVERFLOW 0x04 /* same as P_OVERFLOW for CHECK_LEAF_TYPE() */
|
|
|
|
|
#define CC_UPDATING 0x08 /* update/rebalance pending */
|
2022-06-30 21:38:32 +03:00
|
|
|
|
#define CC_SKIPORD 0x10 /* don't check keys ordering */
|
|
|
|
|
#define CC_LEAF2 0x20 /* same as P_LEAF2 for CHECK_LEAF_TYPE() */
|
|
|
|
|
#define CC_RETIRING 0x40 /* refs to child pages may be invalid */
|
|
|
|
|
#define CC_PAGECHECK 0x80 /* perform page checking, see MDBX_VALIDATION */
|
2022-11-06 16:35:06 +03:00
|
|
|
|
uint8_t mc_checking;
|
2020-05-15 08:59:03 +03:00
|
|
|
|
|
2017-05-23 21:04:23 +03:00
|
|
|
|
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
|
|
|
|
|
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
|
2017-03-16 18:09:27 +03:00
|
|
|
|
};
|
|
|
|
|
|
2022-06-30 21:38:32 +03:00
|
|
|
|
#define CHECK_LEAF_TYPE(mc, mp) \
|
2022-07-07 15:48:24 +03:00
|
|
|
|
(((PAGETYPE_WHOLE(mp) ^ (mc)->mc_checking) & \
|
2022-07-04 21:22:39 +03:00
|
|
|
|
(CC_BRANCH | CC_LEAF | CC_OVERFLOW | CC_LEAF2)) == 0)
|
2022-06-30 21:38:32 +03:00
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* Context for sorted-dup records.
|
|
|
|
|
* We could have gone to a fully recursive design, with arbitrarily
|
|
|
|
|
* deep nesting of sub-databases. But for now we only handle these
|
|
|
|
|
* levels - main DB, optional sub-DB, sorted-duplicate DB. */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
typedef struct MDBX_xcursor {
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* A sub-cursor for traversing the Dup DB */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_cursor mx_cursor;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* The database record for this Dup DB */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_db mx_db;
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* The auxiliary DB record for this Dup DB */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
MDBX_dbx mx_dbx;
|
|
|
|
|
} MDBX_xcursor;
|
2017-03-16 18:09:27 +03:00
|
|
|
|
|
2018-08-29 19:15:59 +03:00
|
|
|
|
typedef struct MDBX_cursor_couple {
|
|
|
|
|
MDBX_cursor outer;
|
2020-09-29 20:58:09 +03:00
|
|
|
|
void *mc_userctx; /* User-settable context */
|
2018-08-29 19:15:59 +03:00
|
|
|
|
MDBX_xcursor inner;
|
|
|
|
|
} MDBX_cursor_couple;
|
|
|
|
|
|
2017-05-23 14:44:53 +03:00
|
|
|
|
/* The database environment. */
|
2017-05-24 01:42:10 +03:00
|
|
|
|
struct MDBX_env {
|
2021-04-27 01:03:33 +03:00
|
|
|
|
/* ----------------------------------------------------- mostly static part */
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
|
2021-01-30 02:28:12 +03:00
|
|
|
|
MDBX_atomic_uint32_t me_signature;
|
2019-10-22 22:31:06 +03:00
|
|
|
|
/* Failed to update the meta page. Probably an I/O error. */
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MDBX_FATAL_ERROR UINT32_C(0x80000000)
|
2019-10-22 22:31:06 +03:00
|
|
|
|
/* Some fields are initialized. */
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
|
2019-10-22 22:31:06 +03:00
|
|
|
|
/* me_txkey is set */
|
2017-06-21 01:19:04 +03:00
|
|
|
|
#define MDBX_ENV_TXKEY UINT32_C(0x10000000)
|
2020-08-01 19:13:17 +03:00
|
|
|
|
/* Legacy MDBX_MAPASYNC (prior v0.9) */
|
|
|
|
|
#define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000)
|
2022-07-24 21:20:22 +03:00
|
|
|
|
/* Legacy MDBX_COALESCE (prior v0.12) */
|
2022-06-22 18:33:00 +03:00
|
|
|
|
#define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000)
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
|
2020-04-20 17:00:41 +03:00
|
|
|
|
uint32_t me_flags;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
unsigned me_psize; /* DB page size, initialized from me_os_psize */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_mmap_t me_dxb_mmap; /* The main data file */
|
2022-11-29 01:10:44 +03:00
|
|
|
|
#define me_map me_dxb_mmap.base
|
2020-04-20 17:00:41 +03:00
|
|
|
|
#define me_lazy_fd me_dxb_mmap.fd
|
2022-09-25 12:47:31 +03:00
|
|
|
|
mdbx_filehandle_t me_dsync_fd, me_fd4meta;
|
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
2022-12-03 14:55:38 +03:00
|
|
|
|
#define me_overlapped_fd me_ioring.overlapped_fd
|
|
|
|
|
HANDLE me_data_lock_event;
|
2022-09-25 12:47:31 +03:00
|
|
|
|
#endif /* Windows */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_mmap_t me_lck_mmap; /* The lock file */
|
2020-04-20 17:00:41 +03:00
|
|
|
|
#define me_lfd me_lck_mmap.fd
|
2021-04-28 03:44:54 +03:00
|
|
|
|
struct MDBX_lockinfo *me_lck;
|
2020-04-20 17:00:41 +03:00
|
|
|
|
|
2022-09-30 14:06:55 +03:00
|
|
|
|
unsigned me_leaf_nodemax; /* max size of a leaf-node */
|
|
|
|
|
unsigned me_branch_nodemax; /* max size of a branch-node */
|
2022-10-24 01:02:38 +03:00
|
|
|
|
atomic_pgno_t me_mlocked_pgno;
|
|
|
|
|
uint8_t me_psize2log; /* log2 of DB page size */
|
2020-09-19 00:18:55 +03:00
|
|
|
|
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
2021-05-06 02:05:33 +03:00
|
|
|
|
uint16_t me_merge_threshold,
|
|
|
|
|
me_merge_threshold_gc; /* pages emptier than this are candidates for
|
|
|
|
|
merging */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
unsigned me_os_psize; /* OS page size, from osal_syspagesize() */
|
2021-04-27 01:03:33 +03:00
|
|
|
|
unsigned me_maxreaders; /* size of the reader table */
|
2018-06-30 11:58:57 +03:00
|
|
|
|
MDBX_dbi me_maxdbs; /* size of the DB table */
|
2019-09-23 15:32:29 +03:00
|
|
|
|
uint32_t me_pid; /* process ID of this env */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_thread_key_t me_txkey; /* thread-key for readers */
|
2022-08-09 18:27:43 +03:00
|
|
|
|
pathchar_t *me_pathname; /* path to the DB files */
|
2018-06-30 11:58:57 +03:00
|
|
|
|
void *me_pbuf; /* scratch area for DUPSORT put() */
|
2021-06-26 18:54:00 +03:00
|
|
|
|
MDBX_txn *me_txn0; /* preallocated write transaction */
|
2019-11-06 23:53:53 +03:00
|
|
|
|
|
2023-11-03 11:30:54 +03:00
|
|
|
|
MDBX_dbx *me_dbxs; /* array of static DB info */
|
|
|
|
|
uint16_t *me_db_flags; /* array of flags from MDBX_db.md_flags */
|
|
|
|
|
MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */
|
2021-04-28 03:44:54 +03:00
|
|
|
|
unsigned
|
2022-12-07 20:02:23 +03:00
|
|
|
|
me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */
|
|
|
|
|
unsigned me_maxgc_per_branch;
|
|
|
|
|
uint32_t me_live_reader; /* have liveness lock in reader table */
|
|
|
|
|
void *me_userctx; /* User-settable context */
|
2020-09-29 19:24:57 +03:00
|
|
|
|
MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
|
2023-01-03 20:20:03 +03:00
|
|
|
|
size_t me_madv_threshold;
|
2021-04-28 03:44:54 +03:00
|
|
|
|
|
2020-11-21 16:21:57 +03:00
|
|
|
|
struct {
|
|
|
|
|
unsigned dp_reserve_limit;
|
2020-11-21 17:53:17 +03:00
|
|
|
|
unsigned rp_augment_limit;
|
2020-12-02 15:18:54 +03:00
|
|
|
|
unsigned dp_limit;
|
|
|
|
|
unsigned dp_initial;
|
2021-01-22 23:52:03 +03:00
|
|
|
|
uint8_t dp_loose_limit;
|
2021-01-22 17:25:44 +03:00
|
|
|
|
uint8_t spill_max_denominator;
|
|
|
|
|
uint8_t spill_min_denominator;
|
2021-01-22 18:18:52 +03:00
|
|
|
|
uint8_t spill_parent4child_denominator;
|
2021-05-06 02:05:33 +03:00
|
|
|
|
unsigned merge_threshold_16dot16_percent;
|
2022-12-03 14:55:38 +03:00
|
|
|
|
#if !(defined(_WIN32) || defined(_WIN64))
|
|
|
|
|
unsigned writethrough_threshold;
|
|
|
|
|
#endif /* Windows */
|
2022-12-12 01:20:22 +03:00
|
|
|
|
bool prefault_write;
|
2021-04-27 23:08:52 +03:00
|
|
|
|
union {
|
|
|
|
|
unsigned all;
|
|
|
|
|
/* tracks options with non-auto values but tuned by user */
|
|
|
|
|
struct {
|
|
|
|
|
unsigned dp_limit : 1;
|
2022-12-08 15:35:41 +03:00
|
|
|
|
unsigned rp_augment_limit : 1;
|
2022-12-12 01:20:22 +03:00
|
|
|
|
unsigned prefault_write : 1;
|
2021-04-27 23:08:52 +03:00
|
|
|
|
} non_auto;
|
|
|
|
|
} flags;
|
2020-11-21 16:21:57 +03:00
|
|
|
|
} me_options;
|
2021-04-27 01:03:33 +03:00
|
|
|
|
|
|
|
|
|
/* struct me_dbgeo used for accepting db-geo params from user for the new
|
|
|
|
|
* database creation, i.e. when mdbx_env_set_geometry() was called before
|
|
|
|
|
* mdbx_env_open(). */
|
|
|
|
|
struct {
|
|
|
|
|
size_t lower; /* minimal size of datafile */
|
|
|
|
|
size_t upper; /* maximal size of datafile */
|
|
|
|
|
size_t now; /* current size of datafile */
|
|
|
|
|
size_t grow; /* step to grow datafile */
|
|
|
|
|
size_t shrink; /* threshold to shrink datafile */
|
|
|
|
|
} me_dbgeo;
|
|
|
|
|
|
|
|
|
|
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
|
|
|
|
|
union {
|
|
|
|
|
key_t key;
|
|
|
|
|
int semid;
|
|
|
|
|
} me_sysv_ipc;
|
|
|
|
|
#endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */
|
2022-12-12 01:20:22 +03:00
|
|
|
|
bool me_incore;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
bool me_prefault_write;
|
2021-04-27 01:03:33 +03:00
|
|
|
|
|
|
|
|
|
MDBX_env *me_lcklist_next;
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------- mostly volatile part */
|
|
|
|
|
|
|
|
|
|
MDBX_txn *me_txn; /* current write transaction */
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_fastmutex_t me_dbi_lock;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
unsigned me_numdbs; /* number of DBs opened */
|
2021-04-27 01:03:33 +03:00
|
|
|
|
|
|
|
|
|
unsigned me_dp_reserve_len;
|
2023-11-03 11:28:13 +03:00
|
|
|
|
MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */
|
|
|
|
|
|
2021-04-27 01:03:33 +03:00
|
|
|
|
/* PNL of pages that became unused in a write txn */
|
|
|
|
|
MDBX_PNL me_retired_pages;
|
2022-09-25 12:47:31 +03:00
|
|
|
|
osal_ioring_t me_ioring;
|
2021-04-27 01:03:33 +03:00
|
|
|
|
|
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_srwlock_t me_remap_guard;
|
2021-04-27 01:03:33 +03:00
|
|
|
|
/* Workaround for LockFileEx and WriteFile multithread bug */
|
|
|
|
|
CRITICAL_SECTION me_windowsbug_lock;
|
2023-02-09 17:19:25 +03:00
|
|
|
|
char *me_pathname_char; /* cache of multi-byte representation of pathname
|
|
|
|
|
to the DB files */
|
2021-04-27 01:03:33 +03:00
|
|
|
|
#else
|
2022-08-11 01:03:15 +03:00
|
|
|
|
osal_fastmutex_t me_remap_guard;
|
2021-04-27 01:03:33 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------- debugging */
|
|
|
|
|
|
2017-06-14 23:33:13 +03:00
|
|
|
|
#if MDBX_DEBUG
|
|
|
|
|
MDBX_assert_func *me_assert_func; /* Callback for assertion failures */
|
|
|
|
|
#endif
|
2019-10-10 22:11:28 +03:00
|
|
|
|
#ifdef MDBX_USE_VALGRIND
|
2017-03-16 18:09:27 +03:00
|
|
|
|
int me_valgrind_handle;
|
2019-10-10 22:11:28 +03:00
|
|
|
|
#endif
|
|
|
|
|
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
|
|
|
|
|
pgno_t me_poison_edge;
|
2019-12-29 01:19:31 +03:00
|
|
|
|
#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
|
2021-04-28 03:44:54 +03:00
|
|
|
|
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#ifndef xMDBX_DEBUG_SPILLING
|
|
|
|
|
#define xMDBX_DEBUG_SPILLING 0
|
2021-04-28 18:03:35 +03:00
|
|
|
|
#endif
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#if xMDBX_DEBUG_SPILLING == 2
|
2022-10-08 15:02:45 +03:00
|
|
|
|
size_t debug_dirtied_est, debug_dirtied_act;
|
2021-04-29 19:50:25 +03:00
|
|
|
|
#endif /* xMDBX_DEBUG_SPILLING */
|
2021-04-28 18:03:35 +03:00
|
|
|
|
|
2021-04-28 03:44:54 +03:00
|
|
|
|
/* ------------------------------------------------- stub for lck-less mode */
|
2021-05-08 22:43:16 +03:00
|
|
|
|
MDBX_atomic_uint64_t
|
|
|
|
|
x_lckless_stub[(sizeof(MDBX_lockinfo) + MDBX_CACHELINE_SIZE - 1) /
|
|
|
|
|
sizeof(MDBX_atomic_uint64_t)];
|
2017-03-16 18:09:27 +03:00
|
|
|
|
};
|
|
|
|
|
|
2020-09-13 18:06:14 +03:00
|
|
|
|
#ifndef __cplusplus
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/*----------------------------------------------------------------------------*/
|
2019-11-13 22:12:09 +03:00
|
|
|
|
/* Cache coherence and mmap invalidation */
|
|
|
|
|
|
|
|
|
|
#if MDBX_CPU_WRITEBACK_INCOHERENT
|
2022-08-11 01:03:15 +03:00
|
|
|
|
#define osal_flush_incoherent_cpu_writeback() osal_memory_barrier()
|
2019-11-13 22:12:09 +03:00
|
|
|
|
#else
|
2022-08-11 01:03:15 +03:00
|
|
|
|
#define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier()
|
2019-11-13 22:12:09 +03:00
|
|
|
|
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
|
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED static __inline void
|
2022-11-29 01:10:44 +03:00
|
|
|
|
osal_flush_incoherent_mmap(const void *addr, size_t nbytes,
|
|
|
|
|
const intptr_t pagesize) {
|
2019-11-13 22:12:09 +03:00
|
|
|
|
#if MDBX_MMAP_INCOHERENT_FILE_WRITE
|
|
|
|
|
char *const begin = (char *)(-pagesize & (intptr_t)addr);
|
|
|
|
|
char *const end =
|
|
|
|
|
(char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1));
|
|
|
|
|
int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0;
|
2022-08-11 01:03:15 +03:00
|
|
|
|
eASSERT(nullptr, err == 0);
|
2019-11-13 22:12:09 +03:00
|
|
|
|
(void)err;
|
|
|
|
|
#else
|
|
|
|
|
(void)pagesize;
|
|
|
|
|
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
|
|
|
|
|
|
|
|
|
|
#if MDBX_MMAP_INCOHERENT_CPU_CACHE
|
|
|
|
|
#ifdef DCACHE
|
|
|
|
|
/* MIPS has cache coherency issues.
|
|
|
|
|
* Note: for any nbytes >= on-chip cache size, entire is flushed. */
|
2022-11-29 01:10:44 +03:00
|
|
|
|
cacheflush((void *)addr, nbytes, DCACHE);
|
2019-11-13 22:12:09 +03:00
|
|
|
|
#else
|
|
|
|
|
#error "Oops, cacheflush() not available"
|
|
|
|
|
#endif /* DCACHE */
|
|
|
|
|
#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
|
|
|
|
|
|
|
|
|
|
#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE
|
|
|
|
|
(void)addr;
|
|
|
|
|
(void)nbytes;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------*/
|
2018-09-19 00:21:42 +03:00
|
|
|
|
/* Internal prototypes */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2022-08-11 01:03:15 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked,
|
|
|
|
|
int *dead);
|
|
|
|
|
MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin,
|
|
|
|
|
MDBX_reader *end);
|
|
|
|
|
MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key);
|
2018-03-28 15:57:16 +03:00
|
|
|
|
|
2022-08-11 01:03:15 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC void global_ctor(void);
|
2022-11-01 19:31:25 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC void osal_ctor(void);
|
2022-08-11 01:03:15 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC void global_dtor(void);
|
2022-11-01 19:31:25 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC void osal_dtor(void);
|
2022-08-11 01:03:15 +03:00
|
|
|
|
MDBX_INTERNAL_FUNC void thread_dtor(void *ptr);
|
2017-04-05 18:33:19 +03:00
|
|
|
|
|
2020-09-13 18:06:14 +03:00
|
|
|
|
#endif /* !__cplusplus */
|
|
|
|
|
|
2017-04-21 16:02:27 +03:00
|
|
|
|
#define MDBX_IS_ERROR(rc) \
|
|
|
|
|
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
|
|
|
|
/* Internal error codes, not exposed outside libmdbx */
|
2020-10-30 16:44:05 +03:00
|
|
|
|
#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2019-11-18 00:13:27 +03:00
|
|
|
|
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#define DDBI(mc) \
|
|
|
|
|
(((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi)
|
|
|
|
|
|
2021-03-07 16:27:12 +03:00
|
|
|
|
/* Key size which fits in a DKBUF (debug key buffer). */
|
|
|
|
|
#define DKBUF_MAX 511
|
|
|
|
|
#define DKBUF char _kbuf[DKBUF_MAX * 4 + 2]
|
|
|
|
|
#define DKEY(x) mdbx_dump_val(x, _kbuf, DKBUF_MAX * 2 + 1)
|
|
|
|
|
#define DVAL(x) mdbx_dump_val(x, _kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1)
|
2021-03-27 12:28:23 +03:00
|
|
|
|
|
|
|
|
|
#if MDBX_DEBUG
|
|
|
|
|
#define DKBUF_DEBUG DKBUF
|
|
|
|
|
#define DKEY_DEBUG(x) DKEY(x)
|
|
|
|
|
#define DVAL_DEBUG(x) DVAL(x)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#else
|
2021-03-27 12:28:23 +03:00
|
|
|
|
#define DKBUF_DEBUG ((void)(0))
|
|
|
|
|
#define DKEY_DEBUG(x) ("-")
|
|
|
|
|
#define DVAL_DEBUG(x) ("-")
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* An invalid page number.
|
|
|
|
|
* Mainly used to denote an empty tree. */
|
|
|
|
|
#define P_INVALID (~(pgno_t)0)
|
|
|
|
|
|
|
|
|
|
/* Test if the flags f are set in a flag word w. */
|
|
|
|
|
#define F_ISSET(w, f) (((w) & (f)) == (f))
|
|
|
|
|
|
|
|
|
|
/* Round n up to an even number. */
|
2022-05-15 23:42:57 +03:00
|
|
|
|
#define EVEN(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
|
|
|
|
/* Default size of memory map.
|
|
|
|
|
* This is certainly too small for any actual applications. Apps should
|
2021-03-14 03:42:52 +03:00
|
|
|
|
* always set the size explicitly using mdbx_env_set_geometry(). */
|
2019-10-22 22:31:06 +03:00
|
|
|
|
#define DEFAULT_MAPSIZE MEGABYTE
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
|
|
|
|
/* Number of slots in the reader table.
|
|
|
|
|
* This value was chosen somewhat arbitrarily. The 61 is a prime number,
|
|
|
|
|
* and such readers plus a couple mutexes fit into single 4KB page.
|
|
|
|
|
* Applications should set the table size using mdbx_env_set_maxreaders(). */
|
|
|
|
|
#define DEFAULT_READERS 61
|
|
|
|
|
|
|
|
|
|
/* Test if a page is a leaf page */
|
2018-09-05 22:00:47 +03:00
|
|
|
|
#define IS_LEAF(p) (((p)->mp_flags & P_LEAF) != 0)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* Test if a page is a LEAF2 page */
|
2018-09-05 22:00:47 +03:00
|
|
|
|
#define IS_LEAF2(p) unlikely(((p)->mp_flags & P_LEAF2) != 0)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* Test if a page is a branch page */
|
2018-09-05 22:00:47 +03:00
|
|
|
|
#define IS_BRANCH(p) (((p)->mp_flags & P_BRANCH) != 0)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* Test if a page is an overflow page */
|
2018-09-05 22:00:47 +03:00
|
|
|
|
#define IS_OVERFLOW(p) unlikely(((p)->mp_flags & P_OVERFLOW) != 0)
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* Test if a page is a sub page */
|
2018-09-05 22:00:47 +03:00
|
|
|
|
#define IS_SUBP(p) (((p)->mp_flags & P_SUBP) != 0)
|
|
|
|
|
|
2017-05-24 13:59:50 +03:00
|
|
|
|
/* Header for a single key/data pair within a page.
|
|
|
|
|
* Used in pages of type P_BRANCH and P_LEAF without P_LEAF2.
|
|
|
|
|
* We guarantee 2-byte alignment for 'MDBX_node's.
|
|
|
|
|
*
|
|
|
|
|
* Leaf node flags describe node contents. F_BIGDATA says the node's
|
|
|
|
|
* data part is the page number of an overflow page with actual data.
|
|
|
|
|
* F_DUPDATA and F_SUBDATA can be combined giving duplicate data in
|
|
|
|
|
* a sub-page/sub-database, and named databases (just F_SUBDATA). */
|
|
|
|
|
typedef struct MDBX_node {
|
2019-10-04 01:51:19 +03:00
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
2017-05-24 13:59:50 +03:00
|
|
|
|
union {
|
2019-10-04 01:51:19 +03:00
|
|
|
|
uint32_t mn_dsize;
|
|
|
|
|
uint32_t mn_pgno32;
|
|
|
|
|
};
|
2019-10-23 23:46:12 +03:00
|
|
|
|
uint8_t mn_flags; /* see mdbx_node flags */
|
|
|
|
|
uint8_t mn_extra;
|
2019-10-04 01:51:19 +03:00
|
|
|
|
uint16_t mn_ksize; /* key size */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#else
|
2019-10-04 01:51:19 +03:00
|
|
|
|
uint16_t mn_ksize; /* key size */
|
2019-10-23 23:46:12 +03:00
|
|
|
|
uint8_t mn_extra;
|
|
|
|
|
uint8_t mn_flags; /* see mdbx_node flags */
|
2019-10-04 01:51:19 +03:00
|
|
|
|
union {
|
2019-10-16 14:22:38 +03:00
|
|
|
|
uint32_t mn_pgno32;
|
|
|
|
|
uint32_t mn_dsize;
|
2017-05-24 13:59:50 +03:00
|
|
|
|
};
|
2019-11-13 20:14:17 +03:00
|
|
|
|
#endif /* __BYTE_ORDER__ */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2019-10-22 22:31:06 +03:00
|
|
|
|
/* mdbx_node Flags */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#define F_BIGDATA 0x01 /* data put on overflow page */
|
|
|
|
|
#define F_SUBDATA 0x02 /* data is a sub-database */
|
|
|
|
|
#define F_DUPDATA 0x04 /* data has duplicates */
|
|
|
|
|
|
2019-10-22 22:31:06 +03:00
|
|
|
|
/* valid flags for mdbx_node_add() */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
#define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND)
|
2020-07-30 14:52:27 +03:00
|
|
|
|
|
|
|
|
|
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
|
|
|
|
(!defined(__cplusplus) && defined(_MSC_VER))
|
|
|
|
|
uint8_t mn_data[] /* key and data are appended here */;
|
|
|
|
|
#endif /* C99 */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
} MDBX_node;
|
|
|
|
|
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#define DB_PERSISTENT_FLAGS \
|
2017-05-24 13:59:50 +03:00
|
|
|
|
(MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED | \
|
2020-07-05 15:22:41 +03:00
|
|
|
|
MDBX_INTEGERDUP | MDBX_REVERSEDUP)
|
|
|
|
|
|
|
|
|
|
/* mdbx_dbi_open() flags */
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE)
|
2020-07-05 15:22:41 +03:00
|
|
|
|
|
mdbx: переработка инициализации, проверки и импорта dbi-хендлов в транзакциях.
Ранее инициализация в транзакциях структур данных, связанных с
dbi-хендлами и subDb, выполнялась непосредственно при запуске
транзакций. Что в сценариях с большим кол-вом dbi-дексприторов (например
libfpta) порождало заметные накладные расходы, которые расли линейно от
общего кол-ва открытых subDb, а не от реально используемых в транзакции.
При использовании одной-двух сотен хендлов, при старте каждой транзакции
могли копироваться и/или обнуляться десятки килобайт. Теперь этот
недостаток устранен.
Изменена схема инициализации, валидации и импорта хендлов открытых после
старта транзакции:
1) Инициализация теперь выполняется отложенна, а при старте транзации
обнуляется только массив с однобайтовыми статустами dbi-хендлов.
При этом доступнва опция сборки `MDBX_ENABLE_DBI_SPARSE`, при активации
которой используется битовая карты, что снижает объем инициализации
при старте транзакции в 8 раз (CHAR_BIT).
2) Переработана валидация dbi-хендлов на входах API, с уменьшением кол-ва
проверок и ветвлений до теоретического минимума.
3) Переработ импорт dbi-хендов открытых после старта транзакци, теперь
при этом не захватывается мьютекс.
2023-11-05 22:10:29 +03:00
|
|
|
|
#define DB_VALID 0x8000u /* DB handle is valid, for me_db_flags */
|
|
|
|
|
#define DB_POISON 0x7fffu /* update pending */
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#define DB_INTERNAL_FLAGS DB_VALID
|
|
|
|
|
|
|
|
|
|
#if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS
|
2020-07-07 23:14:01 +03:00
|
|
|
|
#error "Oops, some flags overlapped or wrong"
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#endif
|
|
|
|
|
#if DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS
|
2020-07-07 23:14:01 +03:00
|
|
|
|
#error "Oops, some flags overlapped or wrong"
|
2020-07-05 15:22:41 +03:00
|
|
|
|
#endif
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2022-10-20 19:00:29 +03:00
|
|
|
|
/* Max length of iov-vector passed to writev() call, used for auxilary writes */
|
|
|
|
|
#define MDBX_AUXILARY_IOV_MAX 64
|
|
|
|
|
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
|
|
|
|
|
#undef MDBX_AUXILARY_IOV_MAX
|
|
|
|
|
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
|
|
|
|
|
#endif /* MDBX_AUXILARY_IOV_MAX */
|
2017-05-24 13:59:50 +03:00
|
|
|
|
|
2019-10-22 00:31:15 +03:00
|
|
|
|
/*
|
2017-05-24 15:50:03 +03:00
|
|
|
|
* /
|
|
|
|
|
* | -1, a < b
|
2019-10-22 00:31:15 +03:00
|
|
|
|
* CMP2INT(a,b) = < 0, a == b
|
2017-05-24 15:50:03 +03:00
|
|
|
|
* | 1, a > b
|
|
|
|
|
* \
|
|
|
|
|
*/
|
2022-12-31 00:55:46 +03:00
|
|
|
|
#define CMP2INT(a, b) (((a) != (b)) ? (((a) < (b)) ? -1 : 1) : 0)
|
2017-06-05 17:16:21 +03:00
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
|
2022-01-21 02:14:36 +03:00
|
|
|
|
int64pgno(int64_t i64) {
|
|
|
|
|
if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))
|
|
|
|
|
return (pgno_t)i64;
|
|
|
|
|
return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO;
|
2017-07-26 10:28:09 +03:00
|
|
|
|
}
|
2017-07-26 10:19:05 +03:00
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
|
2022-01-21 02:14:36 +03:00
|
|
|
|
pgno_add(size_t base, size_t augend) {
|
|
|
|
|
assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO);
|
2023-02-11 00:25:14 +03:00
|
|
|
|
return int64pgno((int64_t)base + (int64_t)augend);
|
2022-01-21 02:14:36 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
|
|
|
|
|
pgno_sub(size_t base, size_t subtrahend) {
|
|
|
|
|
assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 &&
|
|
|
|
|
subtrahend < MAX_PAGENO);
|
2023-02-11 00:25:14 +03:00
|
|
|
|
return int64pgno((int64_t)base - (int64_t)subtrahend);
|
2017-07-24 00:54:10 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline bool
|
2020-02-02 20:41:04 +03:00
|
|
|
|
is_powerof2(size_t x) {
|
|
|
|
|
return (x & (x - 1)) == 0;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t
|
2020-04-07 03:21:32 +03:00
|
|
|
|
floor_powerof2(size_t value, size_t granularity) {
|
2020-02-02 20:41:04 +03:00
|
|
|
|
assert(is_powerof2(granularity));
|
2020-04-07 03:21:32 +03:00
|
|
|
|
return value & ~(granularity - 1);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __always_inline size_t
|
2020-04-07 03:21:32 +03:00
|
|
|
|
ceil_powerof2(size_t value, size_t granularity) {
|
|
|
|
|
return floor_powerof2(value + granularity - 1, granularity);
|
2020-02-02 20:41:04 +03:00
|
|
|
|
}
|
2020-07-08 02:26:46 +03:00
|
|
|
|
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static unsigned
|
2022-09-25 12:47:31 +03:00
|
|
|
|
log2n_powerof2(size_t value_uintptr) {
|
|
|
|
|
assert(value_uintptr > 0 && value_uintptr < INT32_MAX &&
|
|
|
|
|
is_powerof2(value_uintptr));
|
|
|
|
|
assert((value_uintptr & -(intptr_t)value_uintptr) == value_uintptr);
|
|
|
|
|
const uint32_t value_uint32 = (uint32_t)value_uintptr;
|
|
|
|
|
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctz)
|
|
|
|
|
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(unsigned));
|
|
|
|
|
return __builtin_ctz(value_uint32);
|
2021-04-07 01:45:14 +03:00
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
|
unsigned long index;
|
2022-09-25 12:47:31 +03:00
|
|
|
|
STATIC_ASSERT(sizeof(value_uint32) <= sizeof(long));
|
|
|
|
|
_BitScanForward(&index, value_uint32);
|
2021-04-07 01:45:14 +03:00
|
|
|
|
return index;
|
|
|
|
|
#else
|
|
|
|
|
static const uint8_t debruijn_ctz32[32] = {
|
|
|
|
|
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
|
|
|
|
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
|
2022-09-25 12:47:31 +03:00
|
|
|
|
return debruijn_ctz32[(uint32_t)(value_uint32 * 0x077CB531ul) >> 27];
|
2021-04-07 01:45:14 +03:00
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-08 02:26:46 +03:00
|
|
|
|
/* Only a subset of the mdbx_env flags can be changed
|
|
|
|
|
* at runtime. Changing other flags requires closing the
|
|
|
|
|
* environment and re-opening it with the new flags. */
|
|
|
|
|
#define ENV_CHANGEABLE_FLAGS \
|
2020-08-01 19:13:17 +03:00
|
|
|
|
(MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC | \
|
2022-06-30 21:38:32 +03:00
|
|
|
|
MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | \
|
|
|
|
|
MDBX_VALIDATION)
|
2020-07-08 02:26:46 +03:00
|
|
|
|
#define ENV_CHANGELESS_FLAGS \
|
|
|
|
|
(MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
|
|
|
|
|
MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)
|
|
|
|
|
#define ENV_USABLE_FLAGS (ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS)
|
|
|
|
|
|
2021-05-08 10:59:15 +03:00
|
|
|
|
#if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS
|
2021-05-11 20:14:09 +03:00
|
|
|
|
MDBX_MAYBE_UNUSED static void static_checks(void) {
|
2020-07-08 02:26:46 +03:00
|
|
|
|
STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI,
|
|
|
|
|
"Oops, MDBX_MAX_DBI or CORE_DBS?");
|
2020-08-22 20:19:46 +03:00
|
|
|
|
STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) ==
|
2020-07-08 02:26:46 +03:00
|
|
|
|
((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) &
|
|
|
|
|
(ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)),
|
|
|
|
|
"Oops, some flags overlapped or wrong");
|
|
|
|
|
STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0,
|
|
|
|
|
"Oops, some flags overlapped or wrong");
|
|
|
|
|
}
|
2020-08-22 20:19:46 +03:00
|
|
|
|
#endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2021-07-16 14:59:37 +03:00
|
|
|
|
|
|
|
|
|
#define MDBX_ASAN_POISON_MEMORY_REGION(addr, size) \
|
|
|
|
|
do { \
|
2022-08-11 01:03:15 +03:00
|
|
|
|
TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \
|
|
|
|
|
(size_t)(size), __LINE__); \
|
2021-07-16 14:59:37 +03:00
|
|
|
|
ASAN_POISON_MEMORY_REGION(addr, size); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size) \
|
|
|
|
|
do { \
|
2022-08-11 01:03:15 +03:00
|
|
|
|
TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), \
|
|
|
|
|
(size_t)(size), __LINE__); \
|
2021-07-16 14:59:37 +03:00
|
|
|
|
ASAN_UNPOISON_MEMORY_REGION(addr, size); \
|
|
|
|
|
} while (0)
|
2023-04-24 20:59:18 +03:00
|
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
|
|
/** \brief Page types for traverse the b-tree.
|
|
|
|
|
* \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */
|
|
|
|
|
enum MDBX_page_type_t {
|
|
|
|
|
MDBX_page_broken,
|
|
|
|
|
MDBX_page_large,
|
|
|
|
|
MDBX_page_branch,
|
|
|
|
|
MDBX_page_leaf,
|
|
|
|
|
MDBX_page_dupfixed_leaf,
|
|
|
|
|
MDBX_subpage_leaf,
|
|
|
|
|
MDBX_subpage_dupfixed_leaf,
|
|
|
|
|
MDBX_subpage_broken,
|
|
|
|
|
};
|
|
|
|
|
typedef enum MDBX_page_type_t MDBX_page_type_t;
|
|
|
|
|
|
|
|
|
|
typedef struct MDBX_walk_sdb {
|
|
|
|
|
MDBX_val name;
|
|
|
|
|
struct MDBX_db *internal, *nested;
|
|
|
|
|
} MDBX_walk_sdb_t;
|
|
|
|
|
|
|
|
|
|
/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */
|
|
|
|
|
typedef int
|
|
|
|
|
MDBX_pgvisitor_func(const size_t pgno, const unsigned number, void *const ctx,
|
|
|
|
|
const int deep, const MDBX_walk_sdb_t *subdb,
|
|
|
|
|
const size_t page_size, const MDBX_page_type_t page_type,
|
|
|
|
|
const MDBX_error_t err, const size_t nentries,
|
|
|
|
|
const size_t payload_bytes, const size_t header_bytes,
|
|
|
|
|
const size_t unused_bytes);
|